amharic-srh-chatbot / chatbot_utils.py
Walelign's picture
Upload 4 files
54c32b3 verified
raw
history blame
1.28 kB
import pandas as pd
import faiss
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
class AmharicChatbot:
def __init__(self, csv_path, threshold=0.70):
self.df = pd.read_csv(csv_path)
self.model = SentenceTransformer("intfloat/multilingual-e5-small")
self.threshold = threshold
self.build_index()
def build_index(self):
self.embeddings = self.model.encode(
["passage: " + q for q in self.df["question"].tolist()],
show_progress_bar=True
).astype("float32")
self.index = faiss.IndexFlatL2(self.embeddings.shape[1])
self.index.add(self.embeddings)
def get_answer(self, user_question, k=1):
user_embedding = self.model.encode([f"query: {user_question}"])[0].astype("float32")
D, I = self.index.search(np.array([user_embedding]), k)
top_idx = I[0][0]
top_question = self.df.iloc[top_idx]["question"]
top_embedding = self.model.encode([f"passage: {top_question}"])[0]
score = cosine_similarity([user_embedding], [top_embedding])[0][0]
if score < self.threshold:
return "__OUT_OF_SCOPE__"
return self.df.iloc[top_idx]["answer"]