Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.vectorstores import Chroma | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.llms import HuggingFaceHub | |
from langchain.chains import RetrievalQA | |
DB_DIR = "chroma_db" | |
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") | |
llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.2", model_kwargs={"temperature":0.3, "max_new_tokens":500}) | |
def load_and_index(files): | |
all_texts = [] | |
for file in files: | |
loader = PyPDFLoader(file.name) | |
docs = loader.load() | |
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
texts = splitter.split_documents(docs) | |
all_texts.extend(texts) | |
vectordb = Chroma.from_documents(all_texts, embedding=embedding_model, persist_directory=DB_DIR) | |
vectordb.persist() | |
return "✅ تم تحميل وفهرسة الملفات." | |
def answer_question(query): | |
vectordb = Chroma(persist_directory=DB_DIR, embedding_function=embedding_model) | |
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=vectordb.as_retriever()) | |
answer = qa_chain.run(query) | |
return answer | |
with gr.Blocks(title="Smart PDF Assistant") as demo: | |
gr.Markdown("# 🤖 Smart PDF Assistant\nحمّل ملفات PDF واسأل أي سؤال 📚") | |
with gr.Row(): | |
uploader = gr.File(file_types=[".pdf"], file_count="multiple", label="تحميل ملفات PDF") | |
index_btn = gr.Button("فهرسة الملفات") | |
index_output = gr.Textbox(label="حالة الفهرسة") | |
index_btn.click(load_and_index, inputs=[uploader], outputs=[index_output]) | |
query = gr.Textbox(label="اكتب سؤالك") | |
answer_btn = gr.Button("أجب") | |
answer_output = gr.Textbox(label="الإجابة") | |
answer_btn.click(answer_question, inputs=[query], outputs=[answer_output]) | |
demo.launch() |