Spaces:
Configuration error
Configuration error
from sentence_transformers import SentenceTransformer | |
from transformers import pipeline | |
import faiss | |
import numpy as np | |
from config import MODEL_CONFIG | |
class ArabicRAGSystem: | |
def __init__(self): | |
self.embedder = SentenceTransformer(MODEL_CONFIG["embedding_model"]) | |
self.llm = pipeline("text-generation", model=MODEL_CONFIG["llm"]) | |
self.index = None | |
self.documents = [] | |
def build_index(self, chunks: List[str]): | |
"""Create FAISS index from document chunks""" | |
self.documents = chunks | |
embeddings = self.embedder.encode(chunks, show_progress_bar=True) | |
self.index = faiss.IndexFlatIP(embeddings.shape[1]) | |
self.index.add(embeddings) | |
def retrieve(self, query: str, k: int = 3) -> List[str]: | |
"""Retrieve relevant document chunks""" | |
query_embedding = self.embedder.encode([query]) | |
distances, indices = self.index.search(query_embedding, k) | |
return [self.documents[i] for i in indices[0]] | |
def generate_answer(self, question: str, context: List[str]) -> str: | |
"""Generate answer using LLM with retrieved context""" | |
prompt = f"""استخدم المعلومات التالية للإجابة على السؤال: | |
السياق: | |
{'\n'.join(context)} | |
السؤال: {question} | |
الإجابة:""" | |
result = self.llm( | |
prompt, | |
max_new_tokens=256, | |
temperature=0.7, | |
do_sample=True | |
) | |
return result[0]["generated_text"].replace(prompt, "") | |