import gradio as gr from langchain_community.vectorstores import Chroma from langchain_community.embeddings import HuggingFaceEmbeddings from langchain.chains import RetrievalQA from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.document_loaders import PyPDFLoader import os import shutil CHROMA_PATH = "chroma_db" EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2" def load_and_prepare_file(file_path): # تنظيف المجلد القديم if os.path.exists(CHROMA_PATH): shutil.rmtree(CHROMA_PATH) # تحميل وتقطيع النص loader = PyPDFLoader(file_path) pages = loader.load_and_split() text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) chunks = text_splitter.split_documents(pages) # إنشاء قاعدة بيانات المتجهات embedding_function = HuggingFaceEmbeddings(model_name=EMBED_MODEL) vectordb = Chroma.from_documents(chunks, embedding_function, persist_directory=CHROMA_PATH) vectordb.persist() return "✅ تم تجهيز الملف بنجاح، يمكنك الآن طرح الأسئلة." def answer_question(question): embedding_function = HuggingFaceEmbeddings(model_name=EMBED_MODEL) vectordb = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function) retriever = vectordb.as_retriever() qa = RetrievalQA.from_chain_type(llm="gpt2", retriever=retriever) result = qa.run(question) return result with gr.Blocks() as demo: gr.Markdown("### 📚 Smart PDF Assistant - مساعد PDF الذكي") file_input = gr.File(label="📄 ارفع ملف PDF", type="filepath") upload_output = gr.Textbox(label="نتيجة الرفع") upload_button = gr.Button("تحميل ومعالجة الملف") question_input = gr.Textbox(label="✍️ اكتب سؤالك هنا") answer_output = gr.Textbox(label="🔎 الإجابة") upload_button.click(load_and_prepare_file, inputs=file_input, outputs=upload_output) question_input.submit(answer_question, inputs=question_input, outputs=answer_output) demo.launch()