File size: 1,433 Bytes
69374eb
 
 
 
9fb5174
69374eb
9fb5174
db7ceef
 
 
69374eb
 
9fb5174
db7ceef
 
69374eb
9fb5174
 
 
69374eb
9fb5174
 
 
 
 
 
 
 
 
 
 
69374eb
db7ceef
 
69374eb
9fb5174
db7ceef
9fb5174
69374eb
9fb5174
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# Load model only once
embedder = SentenceTransformer('all-MiniLM-L6-v2')
DIMENSION = 384  # Fixed dimension for all-MiniLM-L6-v2

class VectorStore:
    def __init__(self):
        self.texts = []
        self.index = None
        self.embeddings = None

    def add_texts(self, texts):
        """Add list of texts to the store."""
        if not texts:
            return
            
        new_embeds = embedder.encode(texts)
        
        # Initialize index if needed
        if self.index is None:
            self.index = faiss.IndexFlatL2(DIMENSION)
            self.embeddings = new_embeds
        else:
            self.embeddings = np.vstack([self.embeddings, new_embeds])
        
        # Rebuild index with all embeddings
        self.index.reset()
        self.index.add(self.embeddings.astype('float32'))
        self.texts.extend(texts)

    def retrieve(self, query, top_k=3):
        """Return top-k relevant texts for the query."""
        if not self.has_data():
            return []
            
        query_embed = embedder.encode([query])
        _, I = self.index.search(query_embed.astype('float32'), top_k)
        return [self.texts[i] for i in I[0] if i < len(self.texts)]

    def has_data(self):
        """Check if we have any data stored"""
        return self.index is not None and self.index.ntotal > 0