chroma / idl /chromadb /proto /chroma.proto
badalsahani's picture
feat: chroma initial deploy
287a0bc
syntax = "proto3";
package chroma;
option go_package = "github.com/chroma/chroma-coordinator/internal/proto/coordinatorpb";
message Status {
string reason = 1;
int32 code = 2; // TODO: What is the enum of this code?
}
message ChromaResponse {
Status status = 1;
}
// Types here should mirror chromadb/types.py
enum Operation {
ADD = 0;
UPDATE = 1;
UPSERT = 2;
DELETE = 3;
}
enum ScalarEncoding {
FLOAT32 = 0;
INT32 = 1;
}
message Vector {
int32 dimension = 1;
bytes vector = 2;
ScalarEncoding encoding = 3;
}
enum SegmentScope {
VECTOR = 0;
METADATA = 1;
}
message Segment {
string id = 1;
string type = 2;
SegmentScope scope = 3;
optional string topic = 4; // TODO should channel <> segment binding exist here?
// If a segment has a collection, it implies that this segment implements the full
// collection and can be used to service queries (for it's given scope.)
optional string collection = 5;
optional UpdateMetadata metadata = 6;
}
message Collection {
string id = 1;
string name = 2;
string topic = 3;
optional UpdateMetadata metadata = 4;
optional int32 dimension = 5;
string tenant = 6;
string database = 7;
}
message Database {
string id = 1;
string name = 2;
string tenant = 3;
}
message Tenant {
string name = 1;
}
message UpdateMetadataValue {
oneof value {
string string_value = 1;
int64 int_value = 2;
double float_value = 3;
}
}
message UpdateMetadata {
map<string, UpdateMetadataValue> metadata = 1;
}
message SubmitEmbeddingRecord {
string id = 1;
optional Vector vector = 2;
optional UpdateMetadata metadata = 3;
Operation operation = 4;
string collection_id = 5;
}
message VectorEmbeddingRecord {
string id = 1;
bytes seq_id = 2;
Vector vector = 3; // TODO: we need to rethink source of truth for vector dimensionality and encoding
}
message VectorQueryResult {
string id = 1;
bytes seq_id = 2;
float distance = 3;
optional Vector vector = 4;
}
message VectorQueryResults {
repeated VectorQueryResult results = 1;
}
/* Vector Reader Interface */
service VectorReader {
rpc GetVectors(GetVectorsRequest) returns (GetVectorsResponse) {}
rpc QueryVectors(QueryVectorsRequest) returns (QueryVectorsResponse) {}
}
message GetVectorsRequest {
repeated string ids = 1;
string segment_id = 2;
}
message GetVectorsResponse {
repeated VectorEmbeddingRecord records = 1;
}
message QueryVectorsRequest {
repeated Vector vectors = 1;
int32 k = 2;
repeated string allowed_ids = 3;
bool include_embeddings = 4;
string segment_id = 5;
// TODO: options as in types.py, its currently unused so can add later
}
message QueryVectorsResponse {
repeated VectorQueryResults results = 1;
}