Spaces:
Sleeping
Sleeping
File size: 1,156 Bytes
4c198b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import os
class SimpleRAG:
def __init__(self, model_name="sentence-transformers/all-MiniLM-L6-v2"):
self.model = SentenceTransformer(model_name)
self.index = None
self.docs = []
self.embeddings = []
def load_docs(self, folder_path):
for file in os.listdir(folder_path):
if file.endswith(".txt"):
with open(os.path.join(folder_path, file), "r", encoding="utf-8") as f:
text = f.read()
self.docs.append((file, text))
def build_index(self):
texts = [doc[1] for doc in self.docs]
embeddings = self.model.encode(texts, convert_to_numpy=True)
self.embeddings = embeddings
self.index = faiss.IndexFlatL2(embeddings.shape[1])
self.index.add(embeddings)
def search(self, query, top_k=3):
query_embedding = self.model.encode([query], convert_to_numpy=True)
distances, indices = self.index.search(query_embedding, top_k)
return [(self.docs[i][0], self.docs[i][1]) for i in indices[0]]
|