File size: 1,156 Bytes
4c198b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import os

class SimpleRAG:
    def __init__(self, model_name="sentence-transformers/all-MiniLM-L6-v2"):
        self.model = SentenceTransformer(model_name)
        self.index = None
        self.docs = []
        self.embeddings = []

    def load_docs(self, folder_path):
        for file in os.listdir(folder_path):
            if file.endswith(".txt"):
                with open(os.path.join(folder_path, file), "r", encoding="utf-8") as f:
                    text = f.read()
                    self.docs.append((file, text))
    
    def build_index(self):
        texts = [doc[1] for doc in self.docs]
        embeddings = self.model.encode(texts, convert_to_numpy=True)
        self.embeddings = embeddings
        self.index = faiss.IndexFlatL2(embeddings.shape[1])
        self.index.add(embeddings)

    def search(self, query, top_k=3):
        query_embedding = self.model.encode([query], convert_to_numpy=True)
        distances, indices = self.index.search(query_embedding, top_k)
        return [(self.docs[i][0], self.docs[i][1]) for i in indices[0]]