Spaces:

ramysaidagieb
/

brain247v1

Sleeping

App Files Files Community

brain247v1 / app.py

ramysaidagieb

Update app.py

d9c732d verified about 1 month ago

raw

history blame

3.05 kB

	import os
	import shutil
	import tempfile
	import gradio as gr

	from langchain_community.vectorstores import Chroma
	from langchain_community.document_loaders import PyPDFLoader, UnstructuredWordDocumentLoader
	from langchain_community.embeddings import HuggingFaceEmbeddings

	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.chains import RetrievalQA
	from langchain.llms import LiteLLM

	DB_DIR = "chroma_db"
	CHUNK_SIZE = 500
	CHUNK_OVERLAP = 50

	def load_documents(file_path):
	if file_path.endswith(".pdf"):
	loader = PyPDFLoader(file_path)
	elif file_path.endswith(".docx") or file_path.endswith(".doc"):
	loader = UnstructuredWordDocumentLoader(file_path)
	else:
	raise ValueError("Unsupported file type. Only PDF and DOCX are supported.")
	return loader.load()

	def create_vector_store(documents):
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
	texts = text_splitter.split_documents(documents)

	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
	vectordb = Chroma.from_documents(texts, embedding=embeddings, persist_directory=DB_DIR)
	vectordb.persist()
	return vectordb

	def process_file(file):
	temp_path = file.name
	target_path = os.path.join(tempfile.gettempdir(), os.path.basename(temp_path))

	if os.path.abspath(temp_path) != os.path.abspath(target_path):
	shutil.copy(temp_path, target_path)

	documents = load_documents(target_path)

	if os.path.exists(DB_DIR):
	shutil.rmtree(DB_DIR)

	vectordb = create_vector_store(documents)
	return "✅ تم معالجة الملف بنجاح. يمكنك الآن كتابة سؤالك."

	def ask_question(question):
	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
	vectordb = Chroma(persist_directory=DB_DIR, embedding_function=embeddings)

	retriever = vectordb.as_retriever()

	llm = LiteLLM(model="mistralai/Mistral-7B-Instruct-v0.2") # لا حاجة لمفتاح API
	qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

	result = qa_chain.run(question)
	return result

	with gr.Blocks(title="Smart PDF Assistant") as demo:
	gr.Markdown("### 🤖 مساعد الكتب الذكي - اسأل أي سؤال بناءً على ملف PDF أو DOCX")

	with gr.Row():
	file_input = gr.File(label="📄 ارفع ملف PDF أو DOCX", file_types=[".pdf", ".docx", ".doc"])
	file_status = gr.Textbox(label="حالة الملف", interactive=False)

	with gr.Row():
	question_input = gr.Textbox(label="❓ اكتب سؤالك هنا", placeholder="ما هو إيمان الكنيسة؟")
	answer_output = gr.Textbox(label="📘 الإجابة", lines=8)

	file_input.change(process_file, inputs=file_input, outputs=file_status)
	question_input.submit(ask_question, inputs=question_input, outputs=answer_output)

	demo.launch()