Spaces:
Runtime error
Runtime error
File size: 2,214 Bytes
d82600f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import chromadb
chroma_client = chromadb.Client()
model = None
def get_model():
global model
if model is None:
try:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2')
except Exception as e:
print(f"[ERROR] Could not load SentenceTransformer: {e}")
model = None
return model
COLLECTION_NAME = "documents"
# Ensure collection exists
if not chroma_client.list_collections() or COLLECTION_NAME not in [c.name for c in chroma_client.list_collections()]:
chroma_client.create_collection(COLLECTION_NAME)
collection = chroma_client.get_collection(COLLECTION_NAME)
def add_to_vector_store(chunks, metadatas=None):
try:
if not chunks:
print("[WARNING] No chunks provided to vector store")
return
model_instance = get_model()
if model_instance is None:
print("[ERROR] Embedding model not available.")
return
embeddings = model_instance.encode(chunks).tolist()
ids = [f"chunk_{i}" for i in range(len(chunks))]
collection.add(documents=chunks, embeddings=embeddings, ids=ids, metadatas=metadatas)
print(f"[INFO] Added {len(chunks)} chunks to vector store")
except Exception as e:
print(f"[ERROR] Failed to add chunks to vector store: {e}")
# Don't raise the exception to prevent the entire upload from failing
def similarity_search(query, top_k=5):
try:
if not query or not query.strip():
return {"documents": [[]], "metadatas": [[]], "distances": [[]]}
model_instance = get_model()
if model_instance is None:
print("[ERROR] Embedding model not available.")
return {"documents": [[]], "metadatas": [[]], "distances": [[]]}
embedding = model_instance.encode([query]).tolist()[0]
results = collection.query(query_embeddings=[embedding], n_results=top_k)
return results
except Exception as e:
print(f"[ERROR] Similarity search failed: {e}")
# Return empty results instead of failing
return {"documents": [[]], "metadatas": [[]], "distances": [[]]} |