ramysaidagieb commited on
Commit
d9c732d
·
verified ·
1 Parent(s): 2d232ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -34
app.py CHANGED
@@ -1,51 +1,77 @@
 
 
 
1
  import gradio as gr
 
2
  from langchain_community.vectorstores import Chroma
 
3
  from langchain_community.embeddings import HuggingFaceEmbeddings
4
- from langchain.chains import RetrievalQA
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
- from langchain.document_loaders import PyPDFLoader
7
- import os
8
- import shutil
9
 
10
- CHROMA_PATH = "chroma_db"
11
- EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
 
12
 
13
- def load_and_prepare_file(file_path):
14
- # تنظيف المجلد القديم
15
- if os.path.exists(CHROMA_PATH):
16
- shutil.rmtree(CHROMA_PATH)
 
 
 
 
17
 
18
- # تحميل وتقطيع النص
19
- loader = PyPDFLoader(file_path)
20
- pages = loader.load_and_split()
21
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
22
- chunks = text_splitter.split_documents(pages)
23
 
24
- # إنشاء قاعدة بيانات المتجهات
25
- embedding_function = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
26
- vectordb = Chroma.from_documents(chunks, embedding_function, persist_directory=CHROMA_PATH)
27
  vectordb.persist()
28
- return "✅ تم تجهيز الملف بنجاح، يمكنك الآن طرح الأسئلة."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- def answer_question(question):
31
- embedding_function = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
32
- vectordb = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
33
  retriever = vectordb.as_retriever()
34
- qa = RetrievalQA.from_chain_type(llm="gpt2", retriever=retriever)
35
- result = qa.run(question)
36
- return result
37
 
38
- with gr.Blocks() as demo:
39
- gr.Markdown("### 📚 Smart PDF Assistant - مساعد PDF الذكي")
 
 
 
40
 
41
- file_input = gr.File(label="📄 ارفع ملف PDF", type="filepath")
42
- upload_output = gr.Textbox(label="نتيجة الرفع")
43
- upload_button = gr.Button("تحميل ومعالجة الملف")
 
 
 
44
 
45
- question_input = gr.Textbox(label="✍️ اكتب سؤالك هنا")
46
- answer_output = gr.Textbox(label="🔎 الإجابة")
 
47
 
48
- upload_button.click(load_and_prepare_file, inputs=file_input, outputs=upload_output)
49
- question_input.submit(answer_question, inputs=question_input, outputs=answer_output)
50
 
51
  demo.launch()
 
1
+ import os
2
+ import shutil
3
+ import tempfile
4
  import gradio as gr
5
+
6
  from langchain_community.vectorstores import Chroma
7
+ from langchain_community.document_loaders import PyPDFLoader, UnstructuredWordDocumentLoader
8
  from langchain_community.embeddings import HuggingFaceEmbeddings
9
+
10
  from langchain.text_splitter import RecursiveCharacterTextSplitter
11
+ from langchain.chains import RetrievalQA
12
+ from langchain.llms import LiteLLM
 
13
 
14
+ DB_DIR = "chroma_db"
15
+ CHUNK_SIZE = 500
16
+ CHUNK_OVERLAP = 50
17
 
18
+ def load_documents(file_path):
19
+ if file_path.endswith(".pdf"):
20
+ loader = PyPDFLoader(file_path)
21
+ elif file_path.endswith(".docx") or file_path.endswith(".doc"):
22
+ loader = UnstructuredWordDocumentLoader(file_path)
23
+ else:
24
+ raise ValueError("Unsupported file type. Only PDF and DOCX are supported.")
25
+ return loader.load()
26
 
27
+ def create_vector_store(documents):
28
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
29
+ texts = text_splitter.split_documents(documents)
 
 
30
 
31
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
32
+ vectordb = Chroma.from_documents(texts, embedding=embeddings, persist_directory=DB_DIR)
 
33
  vectordb.persist()
34
+ return vectordb
35
+
36
+ def process_file(file):
37
+ temp_path = file.name
38
+ target_path = os.path.join(tempfile.gettempdir(), os.path.basename(temp_path))
39
+
40
+ if os.path.abspath(temp_path) != os.path.abspath(target_path):
41
+ shutil.copy(temp_path, target_path)
42
+
43
+ documents = load_documents(target_path)
44
+
45
+ if os.path.exists(DB_DIR):
46
+ shutil.rmtree(DB_DIR)
47
+
48
+ vectordb = create_vector_store(documents)
49
+ return "✅ تم معالجة الملف بنجاح. يمكنك الآن كتابة سؤالك."
50
+
51
+ def ask_question(question):
52
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
53
+ vectordb = Chroma(persist_directory=DB_DIR, embedding_function=embeddings)
54
 
 
 
 
55
  retriever = vectordb.as_retriever()
 
 
 
56
 
57
+ llm = LiteLLM(model="mistralai/Mistral-7B-Instruct-v0.2") # لا حاجة لمفتاح API
58
+ qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
59
+
60
+ result = qa_chain.run(question)
61
+ return result
62
 
63
+ with gr.Blocks(title="Smart PDF Assistant") as demo:
64
+ gr.Markdown("### 🤖 مساعد الكتب الذكي - اسأل أي سؤال بناءً على ملف PDF أو DOCX")
65
+
66
+ with gr.Row():
67
+ file_input = gr.File(label="📄 ارفع ملف PDF أو DOCX", file_types=[".pdf", ".docx", ".doc"])
68
+ file_status = gr.Textbox(label="حالة الملف", interactive=False)
69
 
70
+ with gr.Row():
71
+ question_input = gr.Textbox(label=" اكتب سؤالك هنا", placeholder="ما هو إيمان الكنيسة؟")
72
+ answer_output = gr.Textbox(label="📘 الإجابة", lines=8)
73
 
74
+ file_input.change(process_file, inputs=file_input, outputs=file_status)
75
+ question_input.submit(ask_question, inputs=question_input, outputs=answer_output)
76
 
77
  demo.launch()