import os import gradio as gr from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import Chroma from langchain.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chains import RetrievalQA from langchain.llms import CTransformers # احصل على التوكن من Secrets HF_TOKEN = os.getenv("HF_TOKEN") # تحميل النموذج محليًا llm = CTransformers( model="TheBloke/Mistral-7B-Instruct-v0.2-GGUF", model_file="mistral-7b-instruct-v0.2.Q4_K_M.gguf", model_type="mistral", hf_token=HF_TOKEN, config={"max_new_tokens": 512, "temperature": 0.7} ) # التحميل الدلالي للنصوص def load_documents(file_path): if file_path.endswith(".pdf"): loader = PyPDFLoader(file_path) elif file_path.endswith(".txt"): loader = TextLoader(file_path, encoding='utf-8') elif file_path.endswith(".docx"): loader = Docx2txtLoader(file_path) else: raise ValueError("Unsupported file type.") return loader.load() # تجهيز المستند def process_file(file): docs = load_documents(file.name) splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) chunks = splitter.split_documents(docs) embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") vectordb = Chroma.from_documents(chunks, embedding) retriever = vectordb.as_retriever() qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever) return qa_chain qa_chain = None # واجهة Gradio def ask_question(file, question): global qa_chain if qa_chain is None: qa_chain = process_file(file) answer = qa_chain.run(question) return f"
{answer}
" with gr.Blocks(css="body {direction: rtl; text-align: right;}") as demo: gr.Markdown("## مساعد الوثائق الذكي - استعلام باللغة العربية من ملفاتك") file_input = gr.File(label="📄 حمّل ملفًا (PDF / DOCX / TXT)", file_types=[".pdf", ".txt", ".docx"]) question_input = gr.Textbox(label="❓ أدخل سؤالك بالعربية", placeholder="ما هو موضوع هذا الملف؟") output = gr.HTML() submit_btn = gr.Button("🔍 استعلم") submit_btn.click(fn=ask_question, inputs=[file_input, question_input], outputs=output) demo.launch()