Spaces:

ramysaidagieb
/

chat22GV2

Sleeping

App Files Files Community

ramysaidagieb commited on May 22

Commit

286b392

verified ·

1 Parent(s): 848b322

Update rag_pipeline.py

Browse files

Files changed (1) hide show

rag_pipeline.py +70 -35

rag_pipeline.py CHANGED Viewed

@@ -1,47 +1,82 @@
-# rag_pipeline.py
-import time
-import logging
-import numpy as np
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModel
-from sentence_transformers import SentenceTransformer
 import chromadb
-from chromadb.config import Settings
-logger = logging.getLogger("RAG")
 class RAGPipeline:
     def __init__(self):
-        logger.info("[RAG] جاري تحميل النموذج والمحول...")
-        self.embedding_model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
         self.chunk_embeddings = []
         self.chunks = []
-        self.client = chromadb.Client(Settings(chroma_db_impl="memory", persist_directory=None))
-        self.collection = self.client.create_collection(name="rag_collection")
-        self.tokenizer = AutoTokenizer.from_pretrained("aubmindlab/aragpt2-mega", trust_remote_code=True)
-        self.lm = AutoModelForCausalLM.from_pretrained("aubmindlab/aragpt2-mega", trust_remote_code=True)
-        logger.info("[RAG] تم التحميل بنجاح.")
-    def build_index(self, chunks):
-        start_time = time.time()
         self.chunks = chunks
-        self.chunk_embeddings = self.embedding_model.encode(chunks, show_progress_bar=True)
-        logger.info(f"[RAG] تم بناء الفهرس بأبعاد {self.chunk_embeddings.shape[1]} في {time.time() - start_time:.2f} ثانية.")
         for i, chunk in enumerate(chunks):
-            self.collection.add(documents=[chunk], ids=[str(i)], embeddings=[self.chunk_embeddings[i].tolist()])
-    def retrieve(self, query, k=5):
-        logger.info("[RAG] استرجاع المقاطع الأكثر صلة بالسؤال...")
-        query_embedding = self.embedding_model.encode([query])[0].tolist()
-        results = self.collection.query(query_embeddings=[query_embedding], n_results=k)
-        return results["documents"][0], results["ids"][0]
-    def generate_answer(self, query):
-        docs, ids = self.retrieve(query)
-        context = "\n\n".join(docs)
-        prompt = f"السياق:\n{context}\n\nالسؤال: {query}\nالإجابة:"
-        inputs = self.tokenizer(prompt, return_tensors="pt")
         with torch.no_grad():
-            outputs = self.lm.generate(**inputs, max_new_tokens=200)
         answer = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-        return answer, context

+from transformers import AutoTokenizer, AutoModel, pipeline
 import torch
 import chromadb
+import time
 class RAGPipeline:
     def __init__(self):
+        print("[RAG] جاري تحميل النموذج والمحول...")
+        start = time.time()
+        self.tokenizer = AutoTokenizer.from_pretrained("aubmindlab/aragpt2-mega", trust_remote_code=True)
+        self.model = AutoModel.from_pretrained("aubmindlab/aragpt2-mega", trust_remote_code=True)
+        # تحويل النموذج إلى وضع التقييم فقط
+        self.model.eval()
+        self.embeddings_pipeline = pipeline("feature-extraction", model=self.model, tokenizer=self.tokenizer)
+        self.chroma_client = chromadb.Client()
+        self.chroma_collection = self.chroma_client.get_or_create_collection(name="rag_arabic_docs")
         self.chunk_embeddings = []
         self.chunks = []
+        print(f"[RAG] تم التحميل بنجاح في {time.time() - start:.2f} ثانية.")
+    def build_index(self, chunks, log_callback=None):
+        self.chunk_embeddings = []
         self.chunks = chunks
+        start_time = time.time()
+        total = len(chunks)
         for i, chunk in enumerate(chunks):
+            if log_callback and i % 10 == 0:
+                log_callback(f"[RAG] تم معالجة {i}/{total} مقاطع.")
+            embedding = self.embeddings_pipeline(chunk, truncation=True, padding=True)
+            embedding_vector = torch.tensor(embedding[0]).mean(dim=0).tolist()
+            self.chunk_embeddings.append(embedding_vector)
+        dim = len(self.chunk_embeddings[0])
+        self.chroma_collection.delete()
+        for i, emb in enumerate(self.chunk_embeddings):
+            self.chroma_collection.add(
+                documents=[self.chunks[i]],
+                embeddings=[emb],
+                ids=[str(i)]
+            )
+        if log_callback:
+            log_callback(f"[RAG] تم بناء الفهرس بأبعاد {dim} في {time.time() - start_time:.2f} ثانية.")
+        return "تم بناء الفهرس."
+    def answer_question(self, question, log_callback=None):
+        if not self.chunk_embeddings:
+            return "⚠️ لم يتم تحميل أو فهرسة أي ملفات بعد."
+        if log_callback:
+            log_callback(f"[RAG] جاري استخراج أفضل مقاطع للسؤال: {question}")
+        # استخراج التضمين للسؤال
+        question_emb = self.embeddings_pipeline(question, truncation=True, padding=True)
+        question_vector = torch.tensor(question_emb[0]).mean(dim=0).tolist()
+        # استرجاع أفضل 3 مقاطع
+        results = self.chroma_collection.query(query_embeddings=[question_vector], n_results=3)
+        docs = results["documents"][0]
+        context = "\n".join(docs)
+        if log_callback:
+            log_callback("[RAG] تم استخراج المقاطع التالية للإجابة:\n" + context)
+        # توليد الإجابة
+        full_prompt = f"السؤال: {question}\n\nالمقاط�� المرجعية:\n{context}\n\nالإجابة:"
+        inputs = self.tokenizer(full_prompt, return_tensors="pt", truncation=True)
         with torch.no_grad():
+            outputs = self.model.generate(
+                **inputs,
+                max_new_tokens=200,
+                do_sample=True,
+                temperature=0.7
+            )
         answer = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+        return answer