File size: 2,861 Bytes
287a0bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
syntax = "proto3";

package chroma;

option go_package = "github.com/chroma/chroma-coordinator/internal/proto/coordinatorpb";

message Status {
  string reason = 1;
  int32 code = 2; // TODO: What is the enum of this code?
}

message ChromaResponse {
  Status status = 1;
}

// Types here should mirror chromadb/types.py

enum Operation {
    ADD = 0;
    UPDATE = 1;
    UPSERT = 2;
    DELETE = 3;
}

enum ScalarEncoding {
    FLOAT32 = 0;
    INT32 = 1;
}

message Vector {
    int32 dimension = 1;
    bytes vector = 2;
    ScalarEncoding encoding = 3;
}

enum SegmentScope {
    VECTOR = 0;
    METADATA = 1;
}

message Segment {
    string id = 1;
    string type = 2;
    SegmentScope scope = 3;
    optional string topic = 4; // TODO should channel <> segment binding exist here?
    // If a segment has a collection, it implies that this segment implements the full
    // collection and can be used to service queries (for it's given scope.)
    optional string collection = 5;
    optional UpdateMetadata metadata = 6;
}

message Collection {
  string id = 1;
  string name = 2;
  string topic = 3;
  optional UpdateMetadata metadata = 4;
  optional int32 dimension = 5;
  string tenant = 6;
  string database = 7;
}

message Database {
    string id = 1;
    string name = 2;
    string tenant = 3;
}

message Tenant {
    string name = 1;
}

message UpdateMetadataValue {
    oneof value {
        string string_value = 1;
        int64 int_value = 2;
        double float_value = 3;
    }
}

message UpdateMetadata {
    map<string, UpdateMetadataValue> metadata = 1;
}

message SubmitEmbeddingRecord {
    string id = 1;
    optional Vector vector = 2;
    optional UpdateMetadata metadata = 3;
    Operation operation = 4;
    string collection_id = 5;
}

message VectorEmbeddingRecord {
    string id = 1;
    bytes seq_id = 2;
    Vector vector = 3; // TODO: we need to rethink source of truth for vector dimensionality and encoding
}

message VectorQueryResult {
    string id = 1;
    bytes seq_id = 2;
    float distance = 3;
    optional Vector vector = 4;
}

message VectorQueryResults {
    repeated VectorQueryResult results = 1;
}

/* Vector Reader Interface */

service VectorReader {
    rpc GetVectors(GetVectorsRequest) returns (GetVectorsResponse) {}
    rpc QueryVectors(QueryVectorsRequest) returns (QueryVectorsResponse) {}
}

message GetVectorsRequest {
    repeated string ids = 1;
    string segment_id = 2;
}

message GetVectorsResponse {
    repeated VectorEmbeddingRecord records = 1;
}

message QueryVectorsRequest {
    repeated Vector vectors = 1;
    int32 k = 2;
    repeated string allowed_ids = 3;
    bool include_embeddings = 4;
    string segment_id = 5;
    // TODO: options as in types.py, its currently unused so can add later
}

message QueryVectorsResponse {
    repeated VectorQueryResults results = 1;
}