Spaces:
Runtime error
Runtime error
import chromadb | |
chroma_client = chromadb.Client() | |
model = None | |
def get_model(): | |
global model | |
if model is None: | |
try: | |
from sentence_transformers import SentenceTransformer | |
model = SentenceTransformer('all-MiniLM-L6-v2') | |
except Exception as e: | |
print(f"[ERROR] Could not load SentenceTransformer: {e}") | |
model = None | |
return model | |
COLLECTION_NAME = "documents" | |
# Ensure collection exists | |
if not chroma_client.list_collections() or COLLECTION_NAME not in [c.name for c in chroma_client.list_collections()]: | |
chroma_client.create_collection(COLLECTION_NAME) | |
collection = chroma_client.get_collection(COLLECTION_NAME) | |
def add_to_vector_store(chunks, metadatas=None): | |
try: | |
if not chunks: | |
print("[WARNING] No chunks provided to vector store") | |
return | |
model_instance = get_model() | |
if model_instance is None: | |
print("[ERROR] Embedding model not available.") | |
return | |
embeddings = model_instance.encode(chunks).tolist() | |
ids = [f"chunk_{i}" for i in range(len(chunks))] | |
collection.add(documents=chunks, embeddings=embeddings, ids=ids, metadatas=metadatas) | |
print(f"[INFO] Added {len(chunks)} chunks to vector store") | |
except Exception as e: | |
print(f"[ERROR] Failed to add chunks to vector store: {e}") | |
# Don't raise the exception to prevent the entire upload from failing | |
def similarity_search(query, top_k=5): | |
try: | |
if not query or not query.strip(): | |
return {"documents": [[]], "metadatas": [[]], "distances": [[]]} | |
model_instance = get_model() | |
if model_instance is None: | |
print("[ERROR] Embedding model not available.") | |
return {"documents": [[]], "metadatas": [[]], "distances": [[]]} | |
embedding = model_instance.encode([query]).tolist()[0] | |
results = collection.query(query_embeddings=[embedding], n_results=top_k) | |
return results | |
except Exception as e: | |
print(f"[ERROR] Similarity search failed: {e}") | |
# Return empty results instead of failing | |
return {"documents": [[]], "metadatas": [[]], "distances": [[]]} |