File size: 1,143 Bytes
5d969f7
 
 
db7ceef
 
 
5d969f7
 
 
 
 
 
db7ceef
 
5d969f7
 
 
 
 
 
 
 
 
 
 
 
 
db7ceef
 
5d969f7
db7ceef
5d969f7
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

class VectorStore:
    def __init__(self):
        self.vectorstore = None
        self.embedder = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=500,
            chunk_overlap=50
        )

    def add_texts(self, texts):
        if not texts:
            return
            
        # Split and add texts
        if self.vectorstore is None:
            self.vectorstore = FAISS.from_texts(
                self.text_splitter.split_text("\n\n".join(texts)),
                self.embedder
            )
        else:
            self.vectorstore.add_texts(
                self.text_splitter.split_text("\n\n".join(texts))
            )

    def retrieve(self, query, top_k=3):
        if self.vectorstore is None:
            return []
        return [
            doc.page_content 
            for doc in self.vectorstore.similarity_search(query, k=top_k)
        ]