File size: 2,142 Bytes
99354e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from transformers import pipeline
from docx import Document

class ArabicRAGPipeline:
    def __init__(self):
        self.embedding_model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')
        self.retriever_index = None
        self.text_chunks = []
        self.chunk_embeddings = None
        self.generator = pipeline(
            "text-generation",
            model="NousResearch/Nous-Hermes-2-Mistral",
            tokenizer="NousResearch/Nous-Hermes-2-Mistral",
            max_new_tokens=256
        )

    def build_index(self, chunks):
        self.text_chunks = chunks
        texts = [chunk[0] for chunk in chunks]
        self.chunk_embeddings = self.embedding_model.encode(texts, convert_to_tensor=False)
        dim = self.chunk_embeddings[0].shape[0]
        self.retriever_index = faiss.IndexFlatL2(dim)
        self.retriever_index.add(np.array(self.chunk_embeddings))

    def retrieve(self, query, top_k=3):
        query_vec = self.embedding_model.encode([query])[0]
        scores, indices = self.retriever_index.search(np.array([query_vec]), top_k)
        return [self.text_chunks[i] for i in indices[0]]

    def generate_answer(self, query, retrieved_passages):
        context = "\n\n".join(p for p, _ in retrieved_passages)
        prompt = f"""أجب باللغة العربية الفصحى على السؤال التالي، بالاعتماد فقط على النصوص التالية. قدم إجابة مدعومة من النص الأصلي، واذكر المرجع المستخدم:

النصوص:
{context}

السؤال: {query}
الإجابة:"""
        response = self.generator(prompt)[0]['generated_text']
        return response.split("الإجابة:")[-1].strip(), retrieved_passages

def save_to_doc(answer, citations):
    doc = Document()
    doc.add_heading("الإجابة", level=1)
    doc.add_paragraph(answer)
    doc.add_heading("المصادر", level=2)
    doc.add_paragraph(citations)
    path = "/tmp/faith_answer.docx"
    doc.save(path)
    return path