syntax = "proto3"; package chroma; option go_package = "github.com/chroma/chroma-coordinator/internal/proto/coordinatorpb"; message Status { string reason = 1; int32 code = 2; // TODO: What is the enum of this code? } message ChromaResponse { Status status = 1; } // Types here should mirror chromadb/types.py enum Operation { ADD = 0; UPDATE = 1; UPSERT = 2; DELETE = 3; } enum ScalarEncoding { FLOAT32 = 0; INT32 = 1; } message Vector { int32 dimension = 1; bytes vector = 2; ScalarEncoding encoding = 3; } enum SegmentScope { VECTOR = 0; METADATA = 1; } message Segment { string id = 1; string type = 2; SegmentScope scope = 3; optional string topic = 4; // TODO should channel <> segment binding exist here? // If a segment has a collection, it implies that this segment implements the full // collection and can be used to service queries (for it's given scope.) optional string collection = 5; optional UpdateMetadata metadata = 6; } message Collection { string id = 1; string name = 2; string topic = 3; optional UpdateMetadata metadata = 4; optional int32 dimension = 5; string tenant = 6; string database = 7; } message Database { string id = 1; string name = 2; string tenant = 3; } message Tenant { string name = 1; } message UpdateMetadataValue { oneof value { string string_value = 1; int64 int_value = 2; double float_value = 3; } } message UpdateMetadata { map metadata = 1; } message SubmitEmbeddingRecord { string id = 1; optional Vector vector = 2; optional UpdateMetadata metadata = 3; Operation operation = 4; string collection_id = 5; } message VectorEmbeddingRecord { string id = 1; bytes seq_id = 2; Vector vector = 3; // TODO: we need to rethink source of truth for vector dimensionality and encoding } message VectorQueryResult { string id = 1; bytes seq_id = 2; float distance = 3; optional Vector vector = 4; } message VectorQueryResults { repeated VectorQueryResult results = 1; } /* Vector Reader Interface */ service VectorReader { rpc GetVectors(GetVectorsRequest) returns (GetVectorsResponse) {} rpc QueryVectors(QueryVectorsRequest) returns (QueryVectorsResponse) {} } message GetVectorsRequest { repeated string ids = 1; string segment_id = 2; } message GetVectorsResponse { repeated VectorEmbeddingRecord records = 1; } message QueryVectorsRequest { repeated Vector vectors = 1; int32 k = 2; repeated string allowed_ids = 3; bool include_embeddings = 4; string segment_id = 5; // TODO: options as in types.py, its currently unused so can add later } message QueryVectorsResponse { repeated VectorQueryResults results = 1; }