import os import shutil import tempfile import gradio as gr from langchain_community.vectorstores import Chroma from langchain_community.document_loaders import PyPDFLoader, UnstructuredWordDocumentLoader from langchain_community.embeddings import HuggingFaceEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chains import RetrievalQA from langchain.llms import LiteLLM DB_DIR = "chroma_db" CHUNK_SIZE = 500 CHUNK_OVERLAP = 50 def load_documents(file_path): if file_path.endswith(".pdf"): loader = PyPDFLoader(file_path) elif file_path.endswith(".docx") or file_path.endswith(".doc"): loader = UnstructuredWordDocumentLoader(file_path) else: raise ValueError("Unsupported file type. Only PDF and DOCX are supported.") return loader.load() def create_vector_store(documents): text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP) texts = text_splitter.split_documents(documents) embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") vectordb = Chroma.from_documents(texts, embedding=embeddings, persist_directory=DB_DIR) vectordb.persist() return vectordb def process_file(file): temp_path = file.name target_path = os.path.join(tempfile.gettempdir(), os.path.basename(temp_path)) if os.path.abspath(temp_path) != os.path.abspath(target_path): shutil.copy(temp_path, target_path) documents = load_documents(target_path) if os.path.exists(DB_DIR): shutil.rmtree(DB_DIR) vectordb = create_vector_store(documents) return "✅ تم معالجة الملف بنجاح. يمكنك الآن كتابة سؤالك." def ask_question(question): embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") vectordb = Chroma(persist_directory=DB_DIR, embedding_function=embeddings) retriever = vectordb.as_retriever() llm = LiteLLM(model="mistralai/Mistral-7B-Instruct-v0.2") # لا حاجة لمفتاح API qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever) result = qa_chain.run(question) return result with gr.Blocks(title="Smart PDF Assistant") as demo: gr.Markdown("### 🤖 مساعد الكتب الذكي - اسأل أي سؤال بناءً على ملف PDF أو DOCX") with gr.Row(): file_input = gr.File(label="📄 ارفع ملف PDF أو DOCX", file_types=[".pdf", ".docx", ".doc"]) file_status = gr.Textbox(label="حالة الملف", interactive=False) with gr.Row(): question_input = gr.Textbox(label="❓ اكتب سؤالك هنا", placeholder="ما هو إيمان الكنيسة؟") answer_output = gr.Textbox(label="📘 الإجابة", lines=8) file_input.change(process_file, inputs=file_input, outputs=file_status) question_input.submit(ask_question, inputs=question_input, outputs=answer_output) demo.launch()