ramysaidagieb commited on
Commit
4e3a79b
·
verified ·
1 Parent(s): 6d2438e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -90
app.py CHANGED
@@ -1,110 +1,81 @@
1
- import os
2
- import shutil
3
- import tempfile
4
- from langchain_community.llms import CTransformers
5
- from langchain.chains import RetrievalQA
6
- from langchain.prompts import PromptTemplate
7
- from langchain.vectorstores import Chroma
8
- from langchain.embeddings import SentenceTransformerEmbeddings
9
- from langchain.document_loaders import PyPDFLoader, Docx2txtLoader, TextLoader
10
  from langchain.text_splitter import RecursiveCharacterTextSplitter
11
- from fastapi import FastAPI, UploadFile, File
12
- from fastapi.responses import JSONResponse
13
- import uvicorn
 
14
  import gradio as gr
 
 
15
 
16
- # إعداد نموذج اللغة
17
- llm = CTransformers(
18
- model="TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
19
  model_file="mistral-7b-instruct-v0.2.Q4_K_M.gguf",
20
  model_type="mistral",
21
  config={
22
- "max_new_tokens": 512,
23
- "temperature": 0.7,
24
- "context_length": 4096,
25
- "gpu_layers": 20,
26
  }
27
  )
28
 
29
- # إعداد المطابقة الدلالية
30
- embedding_function = SentenceTransformerEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
31
-
32
- # تحميل المستندات وإنشاء قاعدة معرفية
33
- def load_documents_from_folder(folder_path):
34
- documents = []
35
- for filename in os.listdir(folder_path):
36
- full_path = os.path.join(folder_path, filename)
37
- if filename.endswith(".pdf"):
38
- loader = PyPDFLoader(full_path)
39
- elif filename.endswith(".docx"):
40
- loader = Docx2txtLoader(full_path)
41
- elif filename.endswith(".txt"):
42
- loader = TextLoader(full_path)
43
- else:
44
- continue
45
- docs = loader.load()
46
- documents.extend(docs)
47
- return documents
48
 
49
- def create_vectorstore(docs):
50
- splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
51
- chunks = splitter.split_documents(docs)
52
- return Chroma.from_documents(chunks, embedding_function)
53
 
54
- # إعداد واجهة الإجابة
55
- retriever = None
56
- qa = None
 
 
 
 
 
 
 
 
57
 
58
- def setup_qa(folder_path):
59
- global retriever, qa
60
- docs = load_documents_from_folder(folder_path)
61
- vectordb = create_vectorstore(docs)
62
- retriever = vectordb.as_retriever()
 
 
 
 
 
63
 
64
- prompt_template = """
65
- أنت مساعد ذكي تجيب باللغة العربية، تستند فقط إلى محتوى الوثائق المقدمة.
66
- لا تقم بإضافة أي معلومات من عندك.
67
 
68
- السؤال: {question}
69
- =========
70
- الوثائق:
71
- {context}
72
- =========
73
- الإجابة المفصلة باللغة العربية:
74
- """
75
 
76
- prompt = PromptTemplate(template=prompt_template, input_variables=["question", "context"])
77
- qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type_kwargs={"prompt": prompt})
78
-
79
- # تحميل الملفات من Gradio
80
- def process_uploaded_files(files):
81
- temp_dir = tempfile.mkdtemp()
82
- for file in files:
83
- dest_path = os.path.join(temp_dir, file.name)
84
- with open(dest_path, "wb") as f:
85
- f.write(file.read())
86
- setup_qa(temp_dir)
87
- shutil.rmtree(temp_dir)
88
-
89
- # الإجابة على الأسئلة
90
  def answer_question(question):
91
- if qa is None:
92
- return "الرجاء رفع ملفاتك أولًا."
93
- response = qa.run(question)
94
- return response
 
 
95
 
96
- # واجهة Gradio
97
- with gr.Blocks(css=".gradio-container { direction: rtl; text-align: right; font-family: 'Cairo', sans-serif; }") as demo:
98
- gr.Markdown("## مساعد الوثائق الذكي")
 
 
 
99
  with gr.Row():
100
- file_input = gr.File(file_types=[".pdf", ".docx", ".txt"], file_count="multiple", label="ارفع ملفاتك")
101
- load_button = gr.Button("ابدأ التحليل")
102
- question_input = gr.Textbox(label="اكتب سؤالك هنا")
103
- answer_output = gr.Textbox(label="الإجابة")
104
 
105
- load_button.click(fn=process_uploaded_files, inputs=[file_input], outputs=[])
106
- question_input.submit(fn=answer_question, inputs=[question_input], outputs=[answer_output])
107
 
108
- # تشغيل Gradio
109
- if __name__ == "__main__":
110
- demo.launch()
 
1
+ from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader, TextLoader
 
 
 
 
 
 
 
 
2
  from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain.embeddings import SentenceTransformerEmbeddings
4
+ from langchain.vectorstores import Chroma
5
+ from langchain.chains import RetrievalQA
6
+ from ctransformers import AutoModelForCausalLM
7
  import gradio as gr
8
+ import os
9
+ import tempfile
10
 
11
+ # Load the model (CPU-only)
12
+ llm = AutoModelForCausalLM.from_pretrained(
13
+ model_path_or_repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
14
  model_file="mistral-7b-instruct-v0.2.Q4_K_M.gguf",
15
  model_type="mistral",
16
  config={
17
+ 'max_new_tokens': 512,
18
+ 'temperature': 0.5,
19
+ 'gpu_layers': 0 # Disable GPU
 
20
  }
21
  )
22
 
23
+ # Embedding model
24
+ embedding_function = SentenceTransformerEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ # Temp folder for uploading documents
27
+ persist_directory = tempfile.mkdtemp()
 
 
28
 
29
+ def load_file(file):
30
+ ext = os.path.splitext(file.name)[1].lower()
31
+ if ext == ".pdf":
32
+ loader = PyPDFLoader(file.name)
33
+ elif ext == ".docx":
34
+ loader = Docx2txtLoader(file.name)
35
+ elif ext == ".txt":
36
+ loader = TextLoader(file.name)
37
+ else:
38
+ return None
39
+ return loader.load()
40
 
41
+ def process_document(file):
42
+ docs = load_file(file)
43
+ if docs is None:
44
+ return "صيغة غير مدعومة."
45
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
46
+ texts = text_splitter.split_documents(docs)
47
+ vectordb = Chroma.from_documents(texts, embedding_function, persist_directory=persist_directory)
48
+ retriever = vectordb.as_retriever(search_kwargs={"k": 3})
49
+ qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
50
+ return qa_chain
51
 
52
+ qa_chain = None
 
 
53
 
54
+ def upload_file(file):
55
+ global qa_chain
56
+ qa_chain = process_document(file)
57
+ return "تم رفع الملف ومعالجته بنجاح. يمكنك الآن طرح سؤالك."
 
 
 
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  def answer_question(question):
60
+ if qa_chain is None:
61
+ return "يرجى رفع ملف أولاً."
62
+ result = qa_chain({"query": question})
63
+ answer = result["result"]
64
+ sources = "\n\n".join([doc.page_content[:200] for doc in result["source_documents"]])
65
+ return f"🧠 الإجابة:\n{answer}\n\n📚 المراجع:\n{sources}"
66
 
67
+ with gr.Blocks() as demo:
68
+ gr.Markdown("# 📄 Smart PDF Assistant\nنظام سؤال وجواب من ملفات PDF وورد ونصوص")
69
+ with gr.Row():
70
+ file_upload = gr.File(label="📂 ارفع مستند", type="file")
71
+ upload_button = gr.Button("معالجة الملف")
72
+ output = gr.Textbox(label="✅ الحالة")
73
  with gr.Row():
74
+ question = gr.Textbox(label="✍️ اكتب سؤالك هنا")
75
+ answer = gr.Button("📤 إرسال")
76
+ response = gr.Textbox(label="🤖 الإجابة", lines=10)
 
77
 
78
+ upload_button.click(fn=upload_file, inputs=file_upload, outputs=output)
79
+ answer.click(fn=answer_question, inputs=question, outputs=response)
80
 
81
+ demo.launch()