ramysaidagieb commited on
Commit
369a913
·
verified ·
1 Parent(s): 145a002

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +13 -13
  2. app.py +45 -0
  3. requirements.txt +6 -0
README.md CHANGED
@@ -1,14 +1,14 @@
1
- ---
2
- title: Brain247v1
3
- emoji: 🔥
4
- colorFrom: indigo
5
- colorTo: purple
6
- sdk: gradio
7
- sdk_version: 5.38.1
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- short_description: brain
12
- ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Smart PDF Assistant
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ Upload PDF files and ask questions. Uses RAG with open-source models.
4
+
5
+ ### Features
6
+ - Multilingual PDF support
7
+ - Mistral 7B Instruct for Q&A
8
+ - SentenceTransformers for embeddings
9
+ - Exportable answers in Gradio
10
+
11
+ ## How to Use
12
+ 1. Upload your PDFs.
13
+ 2. Click "فهرسة الملفات".
14
+ 3. Ask any question and get a response.
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from langchain.document_loaders import PyPDFLoader
4
+ from langchain.vectorstores import Chroma
5
+ from langchain.embeddings import HuggingFaceEmbeddings
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.llms import HuggingFaceHub
8
+ from langchain.chains import RetrievalQA
9
+
10
+ DB_DIR = "chroma_db"
11
+ embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
12
+ llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.2", model_kwargs={"temperature":0.3, "max_new_tokens":500})
13
+
14
+ def load_and_index(files):
15
+ all_texts = []
16
+ for file in files:
17
+ loader = PyPDFLoader(file.name)
18
+ docs = loader.load()
19
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
20
+ texts = splitter.split_documents(docs)
21
+ all_texts.extend(texts)
22
+ vectordb = Chroma.from_documents(all_texts, embedding=embedding_model, persist_directory=DB_DIR)
23
+ vectordb.persist()
24
+ return "✅ تم تحميل وفهرسة الملفات."
25
+
26
+ def answer_question(query):
27
+ vectordb = Chroma(persist_directory=DB_DIR, embedding_function=embedding_model)
28
+ qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=vectordb.as_retriever())
29
+ answer = qa_chain.run(query)
30
+ return answer
31
+
32
+ with gr.Blocks(title="Smart PDF Assistant") as demo:
33
+ gr.Markdown("# 🤖 Smart PDF Assistant\nحمّل ملفات PDF واسأل أي سؤال 📚")
34
+ with gr.Row():
35
+ uploader = gr.File(file_types=[".pdf"], file_count="multiple", label="تحميل ملفات PDF")
36
+ index_btn = gr.Button("فهرسة الملفات")
37
+ index_output = gr.Textbox(label="حالة الفهرسة")
38
+ index_btn.click(load_and_index, inputs=[uploader], outputs=[index_output])
39
+
40
+ query = gr.Textbox(label="اكتب سؤالك")
41
+ answer_btn = gr.Button("أجب")
42
+ answer_output = gr.Textbox(label="الإجابة")
43
+ answer_btn.click(answer_question, inputs=[query], outputs=[answer_output])
44
+
45
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio
2
+ langchain
3
+ chromadb
4
+ sentence-transformers
5
+ pypdf
6
+ huggingface_hub