from transformers import AutoTokenizer, AutoModel, pipeline
import torch
import chromadb
import time

class RAGPipeline:
    def __init__(self):
        print("[RAG] جاري تحميل النموذج والمحول...")
        start = time.time()

        self.tokenizer = AutoTokenizer.from_pretrained("aubmindlab/aragpt2-mega", trust_remote_code=True)
        self.model = AutoModel.from_pretrained("aubmindlab/aragpt2-mega", trust_remote_code=True)

        # تحويل النموذج إلى وضع التقييم فقط
        self.model.eval()

        self.embeddings_pipeline = pipeline("feature-extraction", model=self.model, tokenizer=self.tokenizer)
        self.chroma_client = chromadb.Client()
        self.chroma_collection = self.chroma_client.get_or_create_collection(name="rag_arabic_docs")

        self.chunk_embeddings = []
        self.chunks = []
        print(f"[RAG] تم التحميل بنجاح في {time.time() - start:.2f} ثانية.")

    def build_index(self, chunks, log_callback=None):
        self.chunk_embeddings = []
        self.chunks = chunks
        start_time = time.time()
        total = len(chunks)

        for i, chunk in enumerate(chunks):
            if log_callback and i % 10 == 0:
                log_callback(f"[RAG] تم معالجة {i}/{total} مقاطع.")

            embedding = self.embeddings_pipeline(chunk, truncation=True, padding=True)
            embedding_vector = torch.tensor(embedding[0]).mean(dim=0).tolist()
            self.chunk_embeddings.append(embedding_vector)

        dim = len(self.chunk_embeddings[0])
        self.chroma_collection.delete()
        for i, emb in enumerate(self.chunk_embeddings):
            self.chroma_collection.add(
                documents=[self.chunks[i]],
                embeddings=[emb],
                ids=[str(i)]
            )

        if log_callback:
            log_callback(f"[RAG] تم بناء الفهرس بأبعاد {dim} في {time.time() - start_time:.2f} ثانية.")
        return "تم بناء الفهرس."

    def answer_question(self, question, log_callback=None):
        if not self.chunk_embeddings:
            return "⚠️ لم يتم تحميل أو فهرسة أي ملفات بعد."

        if log_callback:
            log_callback(f"[RAG] جاري استخراج أفضل مقاطع للسؤال: {question}")

        # استخراج التضمين للسؤال
        question_emb = self.embeddings_pipeline(question, truncation=True, padding=True)
        question_vector = torch.tensor(question_emb[0]).mean(dim=0).tolist()

        # استرجاع أفضل 3 مقاطع
        results = self.chroma_collection.query(query_embeddings=[question_vector], n_results=3)
        docs = results["documents"][0]
        context = "\n".join(docs)

        if log_callback:
            log_callback("[RAG] تم استخراج المقاطع التالية للإجابة:\n" + context)

        # توليد الإجابة
        full_prompt = f"السؤال: {question}\n\nالمقاطع المرجعية:\n{context}\n\nالإجابة:"
        inputs = self.tokenizer(full_prompt, return_tensors="pt", truncation=True)
        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=200,
                do_sample=True,
                temperature=0.7
            )
        answer = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        return answer