Scaper_search / rag.py
gaur3009's picture
Update rag.py
5d969f7 verified
raw
history blame
1.14 kB
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
class VectorStore:
def __init__(self):
self.vectorstore = None
self.embedder = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
self.text_splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=50
)
def add_texts(self, texts):
if not texts:
return
# Split and add texts
if self.vectorstore is None:
self.vectorstore = FAISS.from_texts(
self.text_splitter.split_text("\n\n".join(texts)),
self.embedder
)
else:
self.vectorstore.add_texts(
self.text_splitter.split_text("\n\n".join(texts))
)
def retrieve(self, query, top_k=3):
if self.vectorstore is None:
return []
return [
doc.page_content
for doc in self.vectorstore.similarity_search(query, k=top_k)
]