Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,25 +1,25 @@
|
|
1 |
import os
|
|
|
|
|
2 |
import gradio as gr
|
3 |
-
|
|
|
4 |
from langchain.vectorstores import Chroma
|
5 |
from langchain.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
|
6 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
7 |
from langchain.chains import RetrievalQA
|
8 |
from langchain.llms import CTransformers
|
9 |
|
10 |
-
#
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
hf_token=HF_TOKEN,
|
19 |
-
config={"max_new_tokens": 512, "temperature": 0.7}
|
20 |
-
)
|
21 |
|
22 |
-
#
|
23 |
def load_documents(file_path):
|
24 |
if file_path.endswith(".pdf"):
|
25 |
loader = PyPDFLoader(file_path)
|
@@ -28,37 +28,61 @@ def load_documents(file_path):
|
|
28 |
elif file_path.endswith(".docx"):
|
29 |
loader = Docx2txtLoader(file_path)
|
30 |
else:
|
31 |
-
raise ValueError("
|
32 |
return loader.load()
|
33 |
|
34 |
-
#
|
35 |
-
def process_file(
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
|
|
44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
qa_chain = None
|
46 |
|
47 |
-
#
|
48 |
def ask_question(file, question):
|
49 |
global qa_chain
|
50 |
-
if
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
demo.launch()
|
|
|
1 |
import os
|
2 |
+
import tempfile
|
3 |
+
import shutil
|
4 |
import gradio as gr
|
5 |
+
|
6 |
+
from langchain.embeddings import SentenceTransformerEmbeddings
|
7 |
from langchain.vectorstores import Chroma
|
8 |
from langchain.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
|
9 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
10 |
from langchain.chains import RetrievalQA
|
11 |
from langchain.llms import CTransformers
|
12 |
|
13 |
+
# إعداد النموذج المحلي
|
14 |
+
def load_llm():
|
15 |
+
return CTransformers(
|
16 |
+
model="TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
|
17 |
+
model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
|
18 |
+
model_type="mistral",
|
19 |
+
config={"max_new_tokens": 1024, "temperature": 0.1}
|
20 |
+
)
|
|
|
|
|
|
|
21 |
|
22 |
+
# تحميل المستندات من المسار
|
23 |
def load_documents(file_path):
|
24 |
if file_path.endswith(".pdf"):
|
25 |
loader = PyPDFLoader(file_path)
|
|
|
28 |
elif file_path.endswith(".docx"):
|
29 |
loader = Docx2txtLoader(file_path)
|
30 |
else:
|
31 |
+
raise ValueError("نوع الملف غير مدعوم.")
|
32 |
return loader.load()
|
33 |
|
34 |
+
# معالجة الملف وإنشاء سلسلة السؤال والإجابة
|
35 |
+
def process_file(file_path):
|
36 |
+
documents = load_documents(file_path)
|
37 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
|
38 |
+
texts = text_splitter.split_documents(documents)
|
39 |
+
|
40 |
+
embeddings = SentenceTransformerEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
|
41 |
+
db = Chroma.from_documents(texts, embeddings)
|
42 |
+
|
43 |
+
retriever = db.as_retriever(search_kwargs={"k": 3})
|
44 |
+
llm = load_llm()
|
45 |
|
46 |
+
qa = RetrievalQA.from_chain_type(
|
47 |
+
llm=llm,
|
48 |
+
chain_type="stuff",
|
49 |
+
retriever=retriever,
|
50 |
+
return_source_documents=False
|
51 |
+
)
|
52 |
+
return qa
|
53 |
+
|
54 |
+
# الجلوبال تشين
|
55 |
qa_chain = None
|
56 |
|
57 |
+
# دالة معالجة السؤال
|
58 |
def ask_question(file, question):
|
59 |
global qa_chain
|
60 |
+
if file is None or question.strip() == "":
|
61 |
+
return "<div dir='rtl' style='color:red;'>الرجاء تحميل ملف وكتابة سؤال.</div>"
|
62 |
+
|
63 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.name)[-1]) as tmp:
|
64 |
+
shutil.copyfileobj(file, tmp)
|
65 |
+
tmp_path = tmp.name
|
66 |
+
|
67 |
+
try:
|
68 |
+
qa_chain = process_file(tmp_path)
|
69 |
+
answer = qa_chain.run(question)
|
70 |
+
return f"<div dir='rtl' style='text-align: right;'>{answer}</div>"
|
71 |
+
except Exception as e:
|
72 |
+
return f"<div dir='rtl' style='color:red;'>حدث خطأ أثناء المعالجة: {str(e)}</div>"
|
73 |
+
|
74 |
+
# واجهة Gradio
|
75 |
+
with gr.Blocks(title="Smart PDF Assistant", theme=gr.themes.Soft()) as demo:
|
76 |
+
gr.Markdown("<h2 style='text-align: right;'>🧠📚 مساعد الوثائق الذكي</h2>")
|
77 |
+
gr.Markdown("<div dir='rtl'>قم برفع ملف PDF أو DOCX أو TXT، ثم اطرح أي سؤال حول محتواه.</div>")
|
78 |
+
|
79 |
+
with gr.Row():
|
80 |
+
file_input = gr.File(label="📎 ارفع ملفك", file_types=[".pdf", ".docx", ".txt"])
|
81 |
+
question_input = gr.Textbox(label="❓ اكتب سؤالك هنا", placeholder="ما هو ملخص هذا الملف؟")
|
82 |
+
|
83 |
+
answer_output = gr.HTML(label="💬 الإجابة")
|
84 |
+
|
85 |
+
ask_button = gr.Button("🔍 استعلم")
|
86 |
+
ask_button.click(fn=ask_question, inputs=[file_input, question_input], outputs=answer_output)
|
87 |
|
88 |
demo.launch()
|