Spaces:

ramysaidagieb
/

brain247v1

Sleeping

App Files Files Community

brain247v1 / app.py

ramysaidagieb

Update app.py

cb9f6af verified 30 days ago

raw

history blame

3.76 kB

	import os
	import shutil
	import gradio as gr

	from langchain_community.document_loaders import PyPDFLoader
	from langchain_community.vectorstores import Chroma
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.llms import HuggingFaceHub
	from langchain.chains import RetrievalQA
	from langchain.text_splitter import RecursiveCharacterTextSplitter

	# إعداد مجلد التخزين للملفات والبيانات
	CHROMA_PATH = "chroma_db"
	os.makedirs("docs", exist_ok=True)

	# متغير عالمي للسلسلة
	qa_chain = None

	# الدالة لنسخ الملف إلى مجلد docs
	def save_pdf_to_docs(uploaded_file):
	filename = os.path.basename(uploaded_file.name)
	destination_path = os.path.join("docs", filename)
	if os.path.abspath(uploaded_file.name) != os.path.abspath(destination_path):
	shutil.copy(uploaded_file.name, destination_path)
	return destination_path

	# الدالة لمعالجة الملف وبناء قاعدة البيانات
	def process_file(file):
	global qa_chain

	try:
	path = save_pdf_to_docs(file)

	loader = PyPDFLoader(path)
	docs = loader.load()

	# تقسيم النصوص إلى أجزاء صغيرة
	splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
	split_docs = splitter.split_documents(docs)

	# إعداد التضمينات
	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")

	# إنشاء قاعدة بيانات شعاعية
	vectordb = Chroma.from_documents(split_docs, embedding=embeddings, persist_directory=CHROMA_PATH)
	retriever = vectordb.as_retriever(search_kwargs={"k": 3})

	# استخدام نموذج مجاني من Hugging Face لا يتطلب مفتاح API
	llm = HuggingFaceHub(
	repo_id="mistralai/Mistral-7B-Instruct-v0.2",
	huggingfacehub_api_token="", # تركها فارغة على Hugging Face Spaces
	model_kwargs={"temperature": 0.2, "max_new_tokens": 512}
	)

	qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)

	return "✅ تم تحميل الملف بنجاح، يمكنك الآن طرح الأسئلة."

	except Exception as e:
	return f"❌ حدث خطأ أثناء المعالجة: {e}"

	# الدالة للإجابة على السؤال
	def answer_question(question):
	if qa_chain is None:
	return "⚠️ الرجاء رفع ومعالجة ملف PDF أولاً."

	result = qa_chain({"query": question})
	answer = result["result"]

	# عرض النتيجة من اليمين إلى اليسار
	return f"<div dir='rtl'><b>🔍 الإجابة:</b><br>{answer}</div>"

	# بناء واجهة Gradio
	with gr.Blocks(title="مساعد عربي ذكي للملفات") as demo:
	gr.Markdown("## 🤖 مساعد الملفات العربية باستخدام RAG", elem_id="title")

	file_input = gr.File(label="📄 ارفع ملف PDF بالعربية", type="filepath")
	upload_button = gr.Button("🚀 تحميل ومعالجة الملف")
	status_box = gr.Textbox(label="📝 الحالة", interactive=False)

	question_input = gr.Textbox(label="❓ اطرح سؤالك هنا", elem_id="question", rtl=True)
	answer_output = gr.HTML(label="📘 الإجابة", elem_id="answer")

	upload_button.click(fn=process_file, inputs=[file_input], outputs=[status_box])
	question_input.submit(fn=answer_question, inputs=[question_input], outputs=[answer_output])

	# إطلاق التطبيق
	if __name__ == "__main__":
	demo.launch()