Spaces:
Running
Running
File size: 2,861 Bytes
287a0bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
syntax = "proto3";
package chroma;
option go_package = "github.com/chroma/chroma-coordinator/internal/proto/coordinatorpb";
message Status {
string reason = 1;
int32 code = 2; // TODO: What is the enum of this code?
}
message ChromaResponse {
Status status = 1;
}
// Types here should mirror chromadb/types.py
enum Operation {
ADD = 0;
UPDATE = 1;
UPSERT = 2;
DELETE = 3;
}
enum ScalarEncoding {
FLOAT32 = 0;
INT32 = 1;
}
message Vector {
int32 dimension = 1;
bytes vector = 2;
ScalarEncoding encoding = 3;
}
enum SegmentScope {
VECTOR = 0;
METADATA = 1;
}
message Segment {
string id = 1;
string type = 2;
SegmentScope scope = 3;
optional string topic = 4; // TODO should channel <> segment binding exist here?
// If a segment has a collection, it implies that this segment implements the full
// collection and can be used to service queries (for it's given scope.)
optional string collection = 5;
optional UpdateMetadata metadata = 6;
}
message Collection {
string id = 1;
string name = 2;
string topic = 3;
optional UpdateMetadata metadata = 4;
optional int32 dimension = 5;
string tenant = 6;
string database = 7;
}
message Database {
string id = 1;
string name = 2;
string tenant = 3;
}
message Tenant {
string name = 1;
}
message UpdateMetadataValue {
oneof value {
string string_value = 1;
int64 int_value = 2;
double float_value = 3;
}
}
message UpdateMetadata {
map<string, UpdateMetadataValue> metadata = 1;
}
message SubmitEmbeddingRecord {
string id = 1;
optional Vector vector = 2;
optional UpdateMetadata metadata = 3;
Operation operation = 4;
string collection_id = 5;
}
message VectorEmbeddingRecord {
string id = 1;
bytes seq_id = 2;
Vector vector = 3; // TODO: we need to rethink source of truth for vector dimensionality and encoding
}
message VectorQueryResult {
string id = 1;
bytes seq_id = 2;
float distance = 3;
optional Vector vector = 4;
}
message VectorQueryResults {
repeated VectorQueryResult results = 1;
}
/* Vector Reader Interface */
service VectorReader {
rpc GetVectors(GetVectorsRequest) returns (GetVectorsResponse) {}
rpc QueryVectors(QueryVectorsRequest) returns (QueryVectorsResponse) {}
}
message GetVectorsRequest {
repeated string ids = 1;
string segment_id = 2;
}
message GetVectorsResponse {
repeated VectorEmbeddingRecord records = 1;
}
message QueryVectorsRequest {
repeated Vector vectors = 1;
int32 k = 2;
repeated string allowed_ids = 3;
bool include_embeddings = 4;
string segment_id = 5;
// TODO: options as in types.py, its currently unused so can add later
}
message QueryVectorsResponse {
repeated VectorQueryResults results = 1;
}
|