Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,90 +1,79 @@
|
|
1 |
import os
|
2 |
-
import shutil
|
3 |
import gradio as gr
|
4 |
-
|
5 |
from langchain_community.document_loaders import PyPDFLoader
|
6 |
from langchain_community.vectorstores import Chroma
|
7 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
8 |
-
from langchain_community.llms import HuggingFaceHub
|
9 |
-
from langchain.chains import RetrievalQA
|
10 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
11 |
|
12 |
-
# إعداد
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
#
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
# استخدام نموذج مجاني من Hugging Face لا يتطلب مفتاح API
|
49 |
-
llm = HuggingFaceHub(
|
50 |
-
repo_id="mistralai/Mistral-7B-Instruct-v0.2",
|
51 |
-
huggingfacehub_api_token="", # تركها فارغة على Hugging Face Spaces
|
52 |
-
model_kwargs={"temperature": 0.2, "max_new_tokens": 512}
|
53 |
-
)
|
54 |
-
|
55 |
-
qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
|
56 |
-
|
57 |
-
return "✅ تم تحميل الملف بنجاح، يمكنك الآن طرح الأسئلة."
|
58 |
-
|
59 |
-
except Exception as e:
|
60 |
-
return f"❌ حدث خطأ أثناء المعالجة: {e}"
|
61 |
-
|
62 |
-
# الدالة للإجابة على السؤال
|
63 |
def answer_question(question):
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
gr.
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
question_input = gr.Textbox(label="❓ اطرح سؤالك هنا", elem_id="question", rtl=True)
|
82 |
-
answer_output = gr.HTML(label="📘 الإجابة", elem_id="answer")
|
83 |
-
|
84 |
-
upload_button.click(fn=process_file, inputs=[file_input], outputs=[status_box])
|
85 |
-
question_input.submit(fn=answer_question, inputs=[question_input], outputs=[answer_output])
|
86 |
|
87 |
-
#
|
88 |
if __name__ == "__main__":
|
89 |
demo.launch()
|
90 |
-
|
|
|
1 |
import os
|
|
|
2 |
import gradio as gr
|
3 |
+
from langchain_community.llms import CTransformers
|
4 |
from langchain_community.document_loaders import PyPDFLoader
|
5 |
from langchain_community.vectorstores import Chroma
|
6 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
|
|
|
|
7 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
8 |
+
from langchain.chains import RetrievalQA
|
9 |
|
10 |
+
# إعداد النموذج المحلي (تأكد من وجود ملف GGUF داخل مجلد models)
|
11 |
+
llm = CTransformers(
|
12 |
+
model="models/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
|
13 |
+
model_type="mistral",
|
14 |
+
config={"max_new_tokens": 512, "temperature": 0.5}
|
15 |
+
)
|
16 |
+
|
17 |
+
# إعداد نموذج التضمين
|
18 |
+
embedding_model = HuggingFaceEmbeddings(
|
19 |
+
model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
|
20 |
+
)
|
21 |
+
|
22 |
+
# تحميل ومعالجة الملفات
|
23 |
+
def process_pdf(pdf_file):
|
24 |
+
loader = PyPDFLoader(pdf_file.name)
|
25 |
+
documents = loader.load()
|
26 |
+
|
27 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
28 |
+
chunk_size=500,
|
29 |
+
chunk_overlap=50
|
30 |
+
)
|
31 |
+
docs = text_splitter.split_documents(documents)
|
32 |
+
|
33 |
+
vectordb = Chroma.from_documents(docs, embedding_model, persist_directory="chroma_db")
|
34 |
+
vectordb.persist()
|
35 |
+
return vectordb
|
36 |
+
|
37 |
+
# تهيئة النظام عند تحميل PDF
|
38 |
+
vectordb = None
|
39 |
+
|
40 |
+
def upload_file(file):
|
41 |
+
global vectordb
|
42 |
+
vectordb = process_pdf(file)
|
43 |
+
return "📚 تم تحميل الملف بنجاح ويمكنك الآن طرح الأسئلة."
|
44 |
+
|
45 |
+
# الإجابة عن الأسئلة
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
def answer_question(question):
|
47 |
+
global vectordb
|
48 |
+
if not vectordb:
|
49 |
+
return "❗ يرجى رفع ملف PDF أولًا."
|
50 |
+
|
51 |
+
qa_chain = RetrievalQA.from_chain_type(
|
52 |
+
llm=llm,
|
53 |
+
retriever=vectordb.as_retriever(search_kwargs={"k": 3}),
|
54 |
+
return_source_documents=True
|
55 |
+
)
|
56 |
+
|
57 |
+
result = qa_chain(question)
|
58 |
+
answer = result['result']
|
59 |
+
return f"💬 الإجابة:\n\n{answer}"
|
60 |
+
|
61 |
+
# واجهة Gradio
|
62 |
+
with gr.Blocks(title="Smart PDF Assistant", theme=gr.themes.Soft()) as demo:
|
63 |
+
gr.Markdown("## 🤖 مساعد PDF الذكي - نظام عربي للإجابة من الملفات بدون API")
|
64 |
|
65 |
+
with gr.Row():
|
66 |
+
pdf_input = gr.File(label="📄 حمّل ملف PDF", file_types=[".pdf"])
|
67 |
+
upload_btn = gr.Button("🔁 تحميل ومعالجة الملف")
|
68 |
+
upload_output = gr.Textbox(label="الحالة", interactive=False)
|
69 |
+
|
70 |
+
with gr.Row():
|
71 |
+
question_input = gr.Textbox(label="❓ اكتب سؤالك هنا", placeholder="ما هو موضوع الفصل الأول؟", lines=2)
|
72 |
+
answer_output = gr.Textbox(label="💡 الإجابة", lines=6)
|
73 |
+
|
74 |
+
upload_btn.click(fn=upload_file, inputs=pdf_input, outputs=upload_output)
|
75 |
+
question_input.submit(fn=answer_question, inputs=question_input, outputs=answer_output)
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
+
# تشغيل Gradio
|
78 |
if __name__ == "__main__":
|
79 |
demo.launch()
|
|