from sentence_transformers import SentenceTransformer import faiss import numpy as np # Load model only once embedder = SentenceTransformer('all-MiniLM-L6-v2') DIMENSION = 384 # Fixed dimension for all-MiniLM-L6-v2 class VectorStore: def __init__(self): self.texts = [] self.index = None self.embeddings = None def add_texts(self, texts): """Add list of texts to the store.""" if not texts: return new_embeds = embedder.encode(texts) # Initialize index if needed if self.index is None: self.index = faiss.IndexFlatL2(DIMENSION) self.embeddings = new_embeds else: self.embeddings = np.vstack([self.embeddings, new_embeds]) # Rebuild index with all embeddings self.index.reset() self.index.add(self.embeddings.astype('float32')) self.texts.extend(texts) def retrieve(self, query, top_k=3): """Return top-k relevant texts for the query.""" if not self.has_data(): return [] query_embed = embedder.encode([query]) _, I = self.index.search(query_embed.astype('float32'), top_k) return [self.texts[i] for i in I[0] if i < len(self.texts)] def has_data(self): """Check if we have any data stored""" return self.index is not None and self.index.ntotal > 0