Spaces:

ramysaidagieb
/

brain247v1

Sleeping

App Files Files Community

ramysaidagieb commited on 29 days ago

Commit

a62dca0

verified ·

1 Parent(s): cb9f6af

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -78

app.py CHANGED Viewed

@@ -1,90 +1,79 @@
 import os
-import shutil
 import gradio as gr
 from langchain_community.document_loaders import PyPDFLoader
 from langchain_community.vectorstores import Chroma
 from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain_community.llms import HuggingFaceHub
-from langchain.chains import RetrievalQA
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-# إعداد مجلد التخزين للملفات والبيانات
-CHROMA_PATH = "chroma_db"
-os.makedirs("docs", exist_ok=True)
-# متغير عالمي للسلسلة
-qa_chain = None
-# الدالة لنسخ الملف إلى مجلد docs
-def save_pdf_to_docs(uploaded_file):
-    filename = os.path.basename(uploaded_file.name)
-    destination_path = os.path.join("docs", filename)
-    if os.path.abspath(uploaded_file.name) != os.path.abspath(destination_path):
-        shutil.copy(uploaded_file.name, destination_path)
-    return destination_path
-# الدالة لمعالجة الملف وبناء قاعدة البيانات
-def process_file(file):
-    global qa_chain
-    try:
-        path = save_pdf_to_docs(file)
-        loader = PyPDFLoader(path)
-        docs = loader.load()
-        # تقسيم النصوص إلى أجزاء صغيرة
-        splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
-        split_docs = splitter.split_documents(docs)
-        # إعداد التضمينات
-        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
-        # إنشاء قاعدة بيانات شعاعية
-        vectordb = Chroma.from_documents(split_docs, embedding=embeddings, persist_directory=CHROMA_PATH)
-        retriever = vectordb.as_retriever(search_kwargs={"k": 3})
-        # استخدام نموذج مجاني من Hugging Face لا يتطلب مفتاح API
-        llm = HuggingFaceHub(
-            repo_id="mistralai/Mistral-7B-Instruct-v0.2",
-            huggingfacehub_api_token="",  # تركها فارغة على Hugging Face Spaces
-            model_kwargs={"temperature": 0.2, "max_new_tokens": 512}
-        )
-        qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
-        return "✅ تم تحميل الملف بنجاح، يمكنك الآن طرح الأسئلة."
-    except Exception as e:
-        return f"❌ حدث خطأ أثناء المعالجة: {e}"
-# الدالة للإجابة على السؤال
 def answer_question(question):
-    if qa_chain is None:
-        return "⚠️ الرجاء رفع ومعالجة ملف PDF أولاً."
-    result = qa_chain({"query": question})
-    answer = result["result"]
-    # عرض النتيجة من اليمين إلى اليسار
-    return f"<div dir='rtl'><b>🔍 الإجابة:</b><br>{answer}</div>"
-# بناء واجهة Gradio
-with gr.Blocks(title="مساعد عربي ذكي للملفات") as demo:
-    gr.Markdown("## 🤖 مساعد الملفات العربية باستخدام RAG", elem_id="title")
-    file_input = gr.File(label="📄 ارفع ملف PDF بالعربية", type="filepath")
-    upload_button = gr.Button("🚀 تحميل ومعالجة الملف")
-    status_box = gr.Textbox(label="📝 الحالة", interactive=False)
-    question_input = gr.Textbox(label="❓ اطرح سؤالك هنا", elem_id="question", rtl=True)
-    answer_output = gr.HTML(label="📘 الإجابة", elem_id="answer")
-    upload_button.click(fn=process_file, inputs=[file_input], outputs=[status_box])
-    question_input.submit(fn=answer_question, inputs=[question_input], outputs=[answer_output])
-# إطلاق التطبيق
 if __name__ == "__main__":
     demo.launch()

 import os
 import gradio as gr
+from langchain_community.llms import CTransformers
 from langchain_community.document_loaders import PyPDFLoader
 from langchain_community.vectorstores import Chroma
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.chains import RetrievalQA
+# إعداد النموذج المحلي (تأكد من وجود ملف GGUF داخل مجلد models)
+llm = CTransformers(
+    model="models/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
+    model_type="mistral",
+    config={"max_new_tokens": 512, "temperature": 0.5}
+)
+# إعداد نموذج التضمين
+embedding_model = HuggingFaceEmbeddings(
+    model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
+)
+# تحميل ومعالجة الملفات
+def process_pdf(pdf_file):
+    loader = PyPDFLoader(pdf_file.name)
+    documents = loader.load()
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=500,
+        chunk_overlap=50
+    )
+    docs = text_splitter.split_documents(documents)
+    vectordb = Chroma.from_documents(docs, embedding_model, persist_directory="chroma_db")
+    vectordb.persist()
+    return vectordb
+# تهيئة النظام عند تحميل PDF
+vectordb = None
+def upload_file(file):
+    global vectordb
+    vectordb = process_pdf(file)
+    return "📚 تم تحميل الملف بنجاح ويمكنك الآن طرح الأسئلة."
+# الإجابة عن الأسئلة
 def answer_question(question):
+    global vectordb
+    if not vectordb:
+        return "❗ يرجى رفع ملف PDF أولًا."
+    qa_chain = RetrievalQA.from_chain_type(
+        llm=llm,
+        retriever=vectordb.as_retriever(search_kwargs={"k": 3}),
+        return_source_documents=True
+    )
+    result = qa_chain(question)
+    answer = result['result']
+    return f"💬 الإجابة:\n\n{answer}"
+# واجهة Gradio
+with gr.Blocks(title="Smart PDF Assistant", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("## 🤖 مساعد PDF الذكي - نظام عربي للإجابة من الملفات بدون API")
+    with gr.Row():
+        pdf_input = gr.File(label="📄 حمّل ملف PDF", file_types=[".pdf"])
+        upload_btn = gr.Button("🔁 تحميل ومعالجة الملف")
+        upload_output = gr.Textbox(label="الحالة", interactive=False)
+    with gr.Row():
+        question_input = gr.Textbox(label="❓ اكتب سؤالك هنا", placeholder="ما هو موضوع الفصل الأول؟", lines=2)
+        answer_output = gr.Textbox(label="💡 الإجابة", lines=6)
+    upload_btn.click(fn=upload_file, inputs=pdf_input, outputs=upload_output)
+    question_input.submit(fn=answer_question, inputs=question_input, outputs=answer_output)
+# تشغيل Gradio
 if __name__ == "__main__":
     demo.launch()