Spaces:

ramysaidagieb
/

brain247v1

Sleeping

App Files Files Community

ramysaidagieb commited on Jul 24

Commit

d9c732d

verified ·

1 Parent(s): 2d232ac

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -34

app.py CHANGED Viewed

@@ -1,51 +1,77 @@
 import gradio as gr
 from langchain_community.vectorstores import Chroma
 from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain.chains import RetrievalQA
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.document_loaders import PyPDFLoader
-import os
-import shutil
-CHROMA_PATH = "chroma_db"
-EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
-def load_and_prepare_file(file_path):
-    # تنظيف المجلد القديم
-    if os.path.exists(CHROMA_PATH):
-        shutil.rmtree(CHROMA_PATH)
-    # تحميل وتقطيع النص
-    loader = PyPDFLoader(file_path)
-    pages = loader.load_and_split()
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
-    chunks = text_splitter.split_documents(pages)
-    # إنشاء قاعدة بيانات المتجهات
-    embedding_function = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
-    vectordb = Chroma.from_documents(chunks, embedding_function, persist_directory=CHROMA_PATH)
     vectordb.persist()
-    return "✅ تم تجهيز الملف بنجاح، يمكنك الآن طرح الأسئلة."
-def answer_question(question):
-    embedding_function = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
-    vectordb = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
     retriever = vectordb.as_retriever()
-    qa = RetrievalQA.from_chain_type(llm="gpt2", retriever=retriever)
-    result = qa.run(question)
-    return result
-with gr.Blocks() as demo:
-    gr.Markdown("### 📚 Smart PDF Assistant - مساعد PDF الذكي")
-    file_input = gr.File(label="📄 ارفع ملف PDF", type="filepath")
-    upload_output = gr.Textbox(label="نتيجة الرفع")
-    upload_button = gr.Button("تحميل ومعالجة الملف")
-    question_input = gr.Textbox(label="✍️ اكتب سؤالك هنا")
-    answer_output = gr.Textbox(label="🔎 الإجابة")
-    upload_button.click(load_and_prepare_file, inputs=file_input, outputs=upload_output)
-    question_input.submit(answer_question, inputs=question_input, outputs=answer_output)
 demo.launch()

+import os
+import shutil
+import tempfile
 import gradio as gr
 from langchain_community.vectorstores import Chroma
+from langchain_community.document_loaders import PyPDFLoader, UnstructuredWordDocumentLoader
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.chains import RetrievalQA
+from langchain.llms import LiteLLM
+DB_DIR = "chroma_db"
+CHUNK_SIZE = 500
+CHUNK_OVERLAP = 50
+def load_documents(file_path):
+    if file_path.endswith(".pdf"):
+        loader = PyPDFLoader(file_path)
+    elif file_path.endswith(".docx") or file_path.endswith(".doc"):
+        loader = UnstructuredWordDocumentLoader(file_path)
+    else:
+        raise ValueError("Unsupported file type. Only PDF and DOCX are supported.")
+    return loader.load()
+def create_vector_store(documents):
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
+    texts = text_splitter.split_documents(documents)
+    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
+    vectordb = Chroma.from_documents(texts, embedding=embeddings, persist_directory=DB_DIR)
     vectordb.persist()
+    return vectordb
+def process_file(file):
+    temp_path = file.name
+    target_path = os.path.join(tempfile.gettempdir(), os.path.basename(temp_path))
+    if os.path.abspath(temp_path) != os.path.abspath(target_path):
+        shutil.copy(temp_path, target_path)
+    documents = load_documents(target_path)
+    if os.path.exists(DB_DIR):
+        shutil.rmtree(DB_DIR)
+    vectordb = create_vector_store(documents)
+    return "✅ تم معالجة الملف بنجاح. يمكنك الآن كتابة سؤالك."
+def ask_question(question):
+    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
+    vectordb = Chroma(persist_directory=DB_DIR, embedding_function=embeddings)
     retriever = vectordb.as_retriever()
+    llm = LiteLLM(model="mistralai/Mistral-7B-Instruct-v0.2")  # لا حاجة لمفتاح API
+    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
+    result = qa_chain.run(question)
+    return result
+with gr.Blocks(title="Smart PDF Assistant") as demo:
+    gr.Markdown("### 🤖 مساعد الكتب الذكي - اسأل أي سؤال بناءً على ملف PDF أو DOCX")
+    with gr.Row():
+        file_input = gr.File(label="📄 ارفع ملف PDF أو DOCX", file_types=[".pdf", ".docx", ".doc"])
+        file_status = gr.Textbox(label="حالة الملف", interactive=False)
+    with gr.Row():
+        question_input = gr.Textbox(label="❓ اكتب سؤالك هنا", placeholder="ما هو إيمان الكنيسة؟")
+        answer_output = gr.Textbox(label="📘 الإجابة", lines=8)
+    file_input.change(process_file, inputs=file_input, outputs=file_status)
+    question_input.submit(ask_question, inputs=question_input, outputs=answer_output)
 demo.launch()