Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,79 +1,91 @@
|
|
|
|
1 |
import os
|
|
|
|
|
2 |
import gradio as gr
|
3 |
-
from
|
4 |
-
from
|
5 |
-
from
|
6 |
-
from langchain_community.embeddings import HuggingFaceEmbeddings
|
7 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
8 |
from langchain.chains import RetrievalQA
|
|
|
|
|
|
|
9 |
|
10 |
-
# إعداد
|
11 |
-
llm =
|
12 |
-
|
|
|
13 |
model_type="mistral",
|
14 |
-
config={"max_new_tokens": 512, "temperature": 0.
|
15 |
)
|
16 |
|
17 |
-
# إعداد
|
18 |
-
|
19 |
-
|
20 |
-
)
|
21 |
|
22 |
-
# تحميل
|
23 |
-
|
24 |
-
loader = PyPDFLoader(pdf_file.name)
|
25 |
-
documents = loader.load()
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
|
37 |
-
|
38 |
-
|
|
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
|
|
50 |
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
60 |
|
61 |
-
# واجهة Gradio
|
62 |
-
with gr.Blocks(title="Smart PDF Assistant", theme=gr.themes.Soft()) as demo:
|
63 |
-
gr.Markdown("## 🤖 مساعد PDF الذكي - نظام عربي للإجابة من الملفات بدون API")
|
64 |
-
|
65 |
with gr.Row():
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
69 |
|
70 |
with gr.Row():
|
71 |
-
question_input = gr.Textbox(
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
|
|
76 |
|
77 |
-
# تشغيل Gradio
|
78 |
if __name__ == "__main__":
|
79 |
demo.launch()
|
|
|
1 |
+
# app.py
|
2 |
import os
|
3 |
+
import shutil
|
4 |
+
import chromadb
|
5 |
import gradio as gr
|
6 |
+
from ctransformers import AutoModelForCausalLM
|
7 |
+
from langchain.embeddings import SentenceTransformerEmbeddings
|
8 |
+
from langchain.vectorstores import Chroma
|
|
|
|
|
9 |
from langchain.chains import RetrievalQA
|
10 |
+
from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
|
11 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
12 |
+
from langchain.llms import CTransformers
|
13 |
|
14 |
+
# 1. إعداد نموذج LLM بدون API باستخدام ctransformers
|
15 |
+
llm = AutoModelForCausalLM.from_pretrained(
|
16 |
+
model_path_or_repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
|
17 |
+
model_file="mistral-7b-instruct-v0.2.Q4_K_M.gguf",
|
18 |
model_type="mistral",
|
19 |
+
config={"max_new_tokens": 512, "temperature": 0.7}
|
20 |
)
|
21 |
|
22 |
+
# 2. إعداد مجلد التخزين
|
23 |
+
CHROMA_DIR = "chroma_store"
|
24 |
+
if os.path.exists(CHROMA_DIR):
|
25 |
+
shutil.rmtree(CHROMA_DIR)
|
26 |
|
27 |
+
# 3. تحميل الملفات وتقسيمها
|
28 |
+
SUPPORTED_TYPES = {"pdf": PyPDFLoader, "docx": Docx2txtLoader, "txt": TextLoader}
|
|
|
|
|
29 |
|
30 |
+
def load_documents(file_paths):
|
31 |
+
documents = []
|
32 |
+
for path in file_paths:
|
33 |
+
ext = path.split(".")[-1].lower()
|
34 |
+
loader_class = SUPPORTED_TYPES.get(ext)
|
35 |
+
if loader_class:
|
36 |
+
loader = loader_class(path)
|
37 |
+
docs = loader.load()
|
38 |
+
documents.extend(docs)
|
39 |
+
return documents
|
40 |
|
41 |
+
# 4. تقسيم النصوص وإنشاء المتجهات
|
42 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
|
43 |
+
embedding = SentenceTransformerEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
|
44 |
|
45 |
+
def create_vectorstore(docs):
|
46 |
+
texts = text_splitter.split_documents(docs)
|
47 |
+
return Chroma.from_documents(texts, embedding, persist_directory=CHROMA_DIR)
|
48 |
|
49 |
+
# 5. واجهة Gradio
|
50 |
+
uploaded_files = []
|
51 |
+
db = None
|
52 |
+
qa_chain = None
|
53 |
|
54 |
+
def upload_files(files):
|
55 |
+
global uploaded_files, db, qa_chain
|
56 |
+
uploaded_paths = [f.name for f in files]
|
57 |
+
uploaded_files.extend(uploaded_paths)
|
58 |
+
docs = load_documents(uploaded_paths)
|
59 |
+
db = create_vectorstore(docs)
|
60 |
+
retriever = db.as_retriever(search_kwargs={"k": 5})
|
61 |
+
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
|
62 |
+
return "تم تحميل الملفات وبناء قاعدة المعرفة بنجاح ✅"
|
63 |
|
64 |
+
def answer_question_arabic(question):
|
65 |
+
if not qa_chain:
|
66 |
+
return "من فضلك قم أولاً بتحميل ملفاتك وبناء قاعدة المعرفة."
|
67 |
+
result = qa_chain.run(question)
|
68 |
+
return result
|
69 |
|
70 |
+
with gr.Blocks(theme=gr.themes.Soft(), rtl=True, title="Smart PDF Assistant") as demo:
|
71 |
+
gr.Markdown("""
|
72 |
+
# 🤖 مساعد الوثائق الذكي باللغة العربية
|
73 |
+
أرفق ملفاتك (PDF، DOCX، TXT) ثم اسأل أي سؤال.
|
74 |
+
""")
|
75 |
|
|
|
|
|
|
|
|
|
76 |
with gr.Row():
|
77 |
+
file_input = gr.File(file_types=[".pdf", ".docx", ".txt"], file_count="multiple", label="📁 ارفع ملفاتك")
|
78 |
+
upload_button = gr.Button("تحميل الملفات")
|
79 |
+
|
80 |
+
status_output = gr.Textbox(label="الحالة")
|
81 |
|
82 |
with gr.Row():
|
83 |
+
question_input = gr.Textbox(lines=2, placeholder="✍️ اكتب سؤالك هنا", label="السؤال")
|
84 |
+
answer_button = gr.Button("أرسل")
|
85 |
+
answer_output = gr.Textbox(label="الإجابة", lines=5)
|
86 |
+
|
87 |
+
upload_button.click(upload_files, inputs=[file_input], outputs=[status_output])
|
88 |
+
answer_button.click(answer_question_arabic, inputs=[question_input], outputs=[answer_output])
|
89 |
|
|
|
90 |
if __name__ == "__main__":
|
91 |
demo.launch()
|