Spaces:
Sleeping
Sleeping
# rag_pipeline.py | |
import time | |
import logging | |
import numpy as np | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModel | |
from sentence_transformers import SentenceTransformer | |
import chromadb | |
from chromadb.config import Settings | |
logger = logging.getLogger("RAG") | |
class RAGPipeline: | |
def __init__(self): | |
logger.info("[RAG] جاري تحميل النموذج والمحول...") | |
self.embedding_model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") | |
self.chunk_embeddings = [] | |
self.chunks = [] | |
self.client = chromadb.Client(Settings(chroma_db_impl="memory", persist_directory=None)) | |
self.collection = self.client.create_collection(name="rag_collection") | |
self.tokenizer = AutoTokenizer.from_pretrained("aubmindlab/aragpt2-mega", trust_remote_code=True) | |
self.lm = AutoModelForCausalLM.from_pretrained("aubmindlab/aragpt2-mega", trust_remote_code=True) | |
logger.info("[RAG] تم التحميل بنجاح.") | |
def build_index(self, chunks): | |
start_time = time.time() | |
self.chunks = chunks | |
self.chunk_embeddings = self.embedding_model.encode(chunks, show_progress_bar=True) | |
logger.info(f"[RAG] تم بناء الفهرس بأبعاد {self.chunk_embeddings.shape[1]} في {time.time() - start_time:.2f} ثانية.") | |
for i, chunk in enumerate(chunks): | |
self.collection.add(documents=[chunk], ids=[str(i)], embeddings=[self.chunk_embeddings[i].tolist()]) | |
def retrieve(self, query, k=5): | |
logger.info("[RAG] استرجاع المقاطع الأكثر صلة بالسؤال...") | |
query_embedding = self.embedding_model.encode([query])[0].tolist() | |
results = self.collection.query(query_embeddings=[query_embedding], n_results=k) | |
return results["documents"][0], results["ids"][0] | |
def generate_answer(self, query): | |
docs, ids = self.retrieve(query) | |
context = "\n\n".join(docs) | |
prompt = f"السياق:\n{context}\n\nالسؤال: {query}\nالإجابة:" | |
inputs = self.tokenizer(prompt, return_tensors="pt") | |
with torch.no_grad(): | |
outputs = self.lm.generate(**inputs, max_new_tokens=200) | |
answer = self.tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return answer, context | |