brain247v1 / app.py
ramysaidagieb's picture
Update app.py
0ffbfee verified
raw
history blame
3.54 kB
# app.py
import os
import shutil
import chromadb
import gradio as gr
from ctransformers import AutoModelForCausalLM
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import CTransformers
# 1. إعداد نموذج LLM بدون API باستخدام ctransformers
llm = AutoModelForCausalLM.from_pretrained(
model_path_or_repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
model_file="mistral-7b-instruct-v0.2.Q4_K_M.gguf",
model_type="mistral",
config={"max_new_tokens": 512, "temperature": 0.7}
)
# 2. إعداد مجلد التخزين
CHROMA_DIR = "chroma_store"
if os.path.exists(CHROMA_DIR):
shutil.rmtree(CHROMA_DIR)
# 3. تحميل الملفات وتقسيمها
SUPPORTED_TYPES = {"pdf": PyPDFLoader, "docx": Docx2txtLoader, "txt": TextLoader}
def load_documents(file_paths):
documents = []
for path in file_paths:
ext = path.split(".")[-1].lower()
loader_class = SUPPORTED_TYPES.get(ext)
if loader_class:
loader = loader_class(path)
docs = loader.load()
documents.extend(docs)
return documents
# 4. تقسيم النصوص وإنشاء المتجهات
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
embedding = SentenceTransformerEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
def create_vectorstore(docs):
texts = text_splitter.split_documents(docs)
return Chroma.from_documents(texts, embedding, persist_directory=CHROMA_DIR)
# 5. واجهة Gradio
uploaded_files = []
db = None
qa_chain = None
def upload_files(files):
global uploaded_files, db, qa_chain
uploaded_paths = [f.name for f in files]
uploaded_files.extend(uploaded_paths)
docs = load_documents(uploaded_paths)
db = create_vectorstore(docs)
retriever = db.as_retriever(search_kwargs={"k": 5})
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
return "تم تحميل الملفات وبناء قاعدة المعرفة بنجاح ✅"
def answer_question_arabic(question):
if not qa_chain:
return "من فضلك قم أولاً بتحميل ملفاتك وبناء قاعدة المعرفة."
result = qa_chain.run(question)
return result
with gr.Blocks(theme=gr.themes.Soft(), rtl=True, title="Smart PDF Assistant") as demo:
gr.Markdown("""
# 🤖 مساعد الوثائق الذكي باللغة العربية
أرفق ملفاتك (PDF، DOCX، TXT) ثم اسأل أي سؤال.
""")
with gr.Row():
file_input = gr.File(file_types=[".pdf", ".docx", ".txt"], file_count="multiple", label="📁 ارفع ملفاتك")
upload_button = gr.Button("تحميل الملفات")
status_output = gr.Textbox(label="الحالة")
with gr.Row():
question_input = gr.Textbox(lines=2, placeholder="✍️ اكتب سؤالك هنا", label="السؤال")
answer_button = gr.Button("أرسل")
answer_output = gr.Textbox(label="الإجابة", lines=5)
upload_button.click(upload_files, inputs=[file_input], outputs=[status_output])
answer_button.click(answer_question_arabic, inputs=[question_input], outputs=[answer_output])
if __name__ == "__main__":
demo.launch()