Spaces:
Sleeping
Sleeping
# app.py | |
import os | |
import shutil | |
import chromadb | |
import gradio as gr | |
from ctransformers import AutoModelForCausalLM | |
from langchain.embeddings import SentenceTransformerEmbeddings | |
from langchain.vectorstores import Chroma | |
from langchain.chains import RetrievalQA | |
from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.llms import CTransformers | |
# 1. إعداد نموذج LLM بدون API باستخدام ctransformers | |
llm = AutoModelForCausalLM.from_pretrained( | |
model_path_or_repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF", | |
model_file="mistral-7b-instruct-v0.2.Q4_K_M.gguf", | |
model_type="mistral", | |
config={"max_new_tokens": 512, "temperature": 0.7} | |
) | |
# 2. إعداد مجلد التخزين | |
CHROMA_DIR = "chroma_store" | |
if os.path.exists(CHROMA_DIR): | |
shutil.rmtree(CHROMA_DIR) | |
# 3. تحميل الملفات وتقسيمها | |
SUPPORTED_TYPES = {"pdf": PyPDFLoader, "docx": Docx2txtLoader, "txt": TextLoader} | |
def load_documents(file_paths): | |
documents = [] | |
for path in file_paths: | |
ext = path.split(".")[-1].lower() | |
loader_class = SUPPORTED_TYPES.get(ext) | |
if loader_class: | |
loader = loader_class(path) | |
docs = loader.load() | |
documents.extend(docs) | |
return documents | |
# 4. تقسيم النصوص وإنشاء المتجهات | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100) | |
embedding = SentenceTransformerEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") | |
def create_vectorstore(docs): | |
texts = text_splitter.split_documents(docs) | |
return Chroma.from_documents(texts, embedding, persist_directory=CHROMA_DIR) | |
# 5. واجهة Gradio | |
uploaded_files = [] | |
db = None | |
qa_chain = None | |
def upload_files(files): | |
global uploaded_files, db, qa_chain | |
uploaded_paths = [f.name for f in files] | |
uploaded_files.extend(uploaded_paths) | |
docs = load_documents(uploaded_paths) | |
db = create_vectorstore(docs) | |
retriever = db.as_retriever(search_kwargs={"k": 5}) | |
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever) | |
return "تم تحميل الملفات وبناء قاعدة المعرفة بنجاح ✅" | |
def answer_question_arabic(question): | |
if not qa_chain: | |
return "من فضلك قم أولاً بتحميل ملفاتك وبناء قاعدة المعرفة." | |
result = qa_chain.run(question) | |
return result | |
with gr.Blocks(theme=gr.themes.Soft(), rtl=True, title="Smart PDF Assistant") as demo: | |
gr.Markdown(""" | |
# 🤖 مساعد الوثائق الذكي باللغة العربية | |
أرفق ملفاتك (PDF، DOCX، TXT) ثم اسأل أي سؤال. | |
""") | |
with gr.Row(): | |
file_input = gr.File(file_types=[".pdf", ".docx", ".txt"], file_count="multiple", label="📁 ارفع ملفاتك") | |
upload_button = gr.Button("تحميل الملفات") | |
status_output = gr.Textbox(label="الحالة") | |
with gr.Row(): | |
question_input = gr.Textbox(lines=2, placeholder="✍️ اكتب سؤالك هنا", label="السؤال") | |
answer_button = gr.Button("أرسل") | |
answer_output = gr.Textbox(label="الإجابة", lines=5) | |
upload_button.click(upload_files, inputs=[file_input], outputs=[status_output]) | |
answer_button.click(answer_question_arabic, inputs=[question_input], outputs=[answer_output]) | |
if __name__ == "__main__": | |
demo.launch() | |