ramysaidagieb commited on
Commit
5f7e1f3
·
verified ·
1 Parent(s): b13872f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -38
app.py CHANGED
@@ -1,25 +1,25 @@
1
  import os
 
 
2
  import gradio as gr
3
- from langchain.embeddings import HuggingFaceEmbeddings
 
4
  from langchain.vectorstores import Chroma
5
  from langchain.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  from langchain.chains import RetrievalQA
8
  from langchain.llms import CTransformers
9
 
10
- # احصل على التوكن من Secrets
11
- HF_TOKEN = os.getenv("HF_TOKEN")
12
-
13
- # تحميل النموذج محليًا
14
- llm = CTransformers(
15
- model="TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
16
- model_file="mistral-7b-instruct-v0.2.Q4_K_M.gguf",
17
- model_type="mistral",
18
- hf_token=HF_TOKEN,
19
- config={"max_new_tokens": 512, "temperature": 0.7}
20
- )
21
 
22
- # التحميل الدلالي للنصوص
23
  def load_documents(file_path):
24
  if file_path.endswith(".pdf"):
25
  loader = PyPDFLoader(file_path)
@@ -28,37 +28,61 @@ def load_documents(file_path):
28
  elif file_path.endswith(".docx"):
29
  loader = Docx2txtLoader(file_path)
30
  else:
31
- raise ValueError("Unsupported file type.")
32
  return loader.load()
33
 
34
- # تجهيز المستند
35
- def process_file(file):
36
- docs = load_documents(file.name)
37
- splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
38
- chunks = splitter.split_documents(docs)
39
- embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
40
- vectordb = Chroma.from_documents(chunks, embedding)
41
- retriever = vectordb.as_retriever()
42
- qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
43
- return qa_chain
 
44
 
 
 
 
 
 
 
 
 
 
45
  qa_chain = None
46
 
47
- # واجهة Gradio
48
  def ask_question(file, question):
49
  global qa_chain
50
- if qa_chain is None:
51
- qa_chain = process_file(file)
52
- answer = qa_chain.run(question)
53
- return f"<div dir='rtl' style='text-align: right;'>{answer}</div>"
54
-
55
- with gr.Blocks(css="body {direction: rtl; text-align: right;}") as demo:
56
- gr.Markdown("## مساعد الوثائق الذكي - استعلام باللغة العربية من ملفاتك")
57
- file_input = gr.File(label="📄 حمّل ملفًا (PDF / DOCX / TXT)", file_types=[".pdf", ".txt", ".docx"])
58
- question_input = gr.Textbox(label="❓ أدخل سؤالك بالعربية", placeholder="ما هو موضوع هذا الملف؟")
59
- output = gr.HTML()
60
-
61
- submit_btn = gr.Button("🔍 استعلم")
62
- submit_btn.click(fn=ask_question, inputs=[file_input, question_input], outputs=output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  demo.launch()
 
1
  import os
2
+ import tempfile
3
+ import shutil
4
  import gradio as gr
5
+
6
+ from langchain.embeddings import SentenceTransformerEmbeddings
7
  from langchain.vectorstores import Chroma
8
  from langchain.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
9
  from langchain.text_splitter import RecursiveCharacterTextSplitter
10
  from langchain.chains import RetrievalQA
11
  from langchain.llms import CTransformers
12
 
13
+ # إعداد النموذج المحلي
14
+ def load_llm():
15
+ return CTransformers(
16
+ model="TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
17
+ model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
18
+ model_type="mistral",
19
+ config={"max_new_tokens": 1024, "temperature": 0.1}
20
+ )
 
 
 
21
 
22
+ # تحميل المستندات من المسار
23
  def load_documents(file_path):
24
  if file_path.endswith(".pdf"):
25
  loader = PyPDFLoader(file_path)
 
28
  elif file_path.endswith(".docx"):
29
  loader = Docx2txtLoader(file_path)
30
  else:
31
+ raise ValueError("نوع الملف غير مدعوم.")
32
  return loader.load()
33
 
34
+ # معالجة الملف وإنشاء سلسلة السؤال والإجابة
35
+ def process_file(file_path):
36
+ documents = load_documents(file_path)
37
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
38
+ texts = text_splitter.split_documents(documents)
39
+
40
+ embeddings = SentenceTransformerEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
41
+ db = Chroma.from_documents(texts, embeddings)
42
+
43
+ retriever = db.as_retriever(search_kwargs={"k": 3})
44
+ llm = load_llm()
45
 
46
+ qa = RetrievalQA.from_chain_type(
47
+ llm=llm,
48
+ chain_type="stuff",
49
+ retriever=retriever,
50
+ return_source_documents=False
51
+ )
52
+ return qa
53
+
54
+ # الجلوبال تشين
55
  qa_chain = None
56
 
57
+ # دالة معالجة السؤال
58
  def ask_question(file, question):
59
  global qa_chain
60
+ if file is None or question.strip() == "":
61
+ return "<div dir='rtl' style='color:red;'>الرجاء تحميل ملف وكتابة سؤال.</div>"
62
+
63
+ with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.name)[-1]) as tmp:
64
+ shutil.copyfileobj(file, tmp)
65
+ tmp_path = tmp.name
66
+
67
+ try:
68
+ qa_chain = process_file(tmp_path)
69
+ answer = qa_chain.run(question)
70
+ return f"<div dir='rtl' style='text-align: right;'>{answer}</div>"
71
+ except Exception as e:
72
+ return f"<div dir='rtl' style='color:red;'>حدث خطأ أثناء المعالجة: {str(e)}</div>"
73
+
74
+ # واجهة Gradio
75
+ with gr.Blocks(title="Smart PDF Assistant", theme=gr.themes.Soft()) as demo:
76
+ gr.Markdown("<h2 style='text-align: right;'>🧠📚 مساعد الوثائق الذكي</h2>")
77
+ gr.Markdown("<div dir='rtl'>قم برفع ملف PDF أو DOCX أو TXT، ثم اطرح أي سؤال حول محتواه.</div>")
78
+
79
+ with gr.Row():
80
+ file_input = gr.File(label="📎 ارفع ملفك", file_types=[".pdf", ".docx", ".txt"])
81
+ question_input = gr.Textbox(label="❓ اكتب سؤالك هنا", placeholder="ما هو ملخص هذا الملف؟")
82
+
83
+ answer_output = gr.HTML(label="💬 الإجابة")
84
+
85
+ ask_button = gr.Button("🔍 استعلم")
86
+ ask_button.click(fn=ask_question, inputs=[file_input, question_input], outputs=answer_output)
87
 
88
  demo.launch()