Spaces:
Sleeping
Sleeping
from sentence_transformers import SentenceTransformer | |
import faiss | |
import numpy as np | |
import os | |
class SimpleRAG: | |
def __init__(self, model_name="sentence-transformers/all-MiniLM-L6-v2"): | |
self.model = SentenceTransformer(model_name) | |
self.index = None | |
self.docs = [] | |
self.embeddings = [] | |
def load_docs(self, folder_path): | |
for file in os.listdir(folder_path): | |
if file.endswith(".txt"): | |
with open(os.path.join(folder_path, file), "r", encoding="utf-8") as f: | |
text = f.read() | |
self.docs.append((file, text)) | |
def build_index(self): | |
texts = [doc[1] for doc in self.docs] | |
embeddings = self.model.encode(texts, convert_to_numpy=True) | |
self.embeddings = embeddings | |
self.index = faiss.IndexFlatL2(embeddings.shape[1]) | |
self.index.add(embeddings) | |
def search(self, query, top_k=3): | |
query_embedding = self.model.encode([query], convert_to_numpy=True) | |
distances, indices = self.index.search(query_embedding, top_k) | |
return [(self.docs[i][0], self.docs[i][1]) for i in indices[0]] | |