Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,40 +5,35 @@ from langchain_community.embeddings import HuggingFaceEmbeddings
|
|
5 |
from langchain_community.document_loaders import PyMuPDFLoader
|
6 |
from langchain_text_splitters import CharacterTextSplitter
|
7 |
from langchain.chains import RetrievalQA
|
8 |
-
from langchain_community.llms import
|
9 |
from huggingface_hub import login
|
10 |
|
11 |
-
# 1. Authentication
|
12 |
-
|
13 |
-
import os
|
14 |
|
|
|
15 |
def create_qa_system():
|
16 |
-
|
17 |
-
|
18 |
-
# 2. PDF Processing Function
|
19 |
-
def create_qa_system():
|
20 |
-
# File check
|
21 |
-
if not os.path.exists("file.pdf"):
|
22 |
-
raise gr.Error("❌ file.pdf not found! Upload it in Space's Files tab")
|
23 |
|
24 |
-
|
25 |
-
loader = PyMuPDFLoader("file.pdf")
|
26 |
documents = loader.load()
|
27 |
|
28 |
-
# Split text
|
29 |
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
30 |
texts = text_splitter.split_documents(documents)
|
31 |
|
32 |
-
|
33 |
-
|
|
|
34 |
|
35 |
-
# Build vector store
|
36 |
db = FAISS.from_documents(texts, embeddings)
|
37 |
|
38 |
-
#
|
39 |
-
llm =
|
40 |
-
repo_id="google/flan-t5-base",
|
41 |
-
|
|
|
|
|
42 |
)
|
43 |
|
44 |
return RetrievalQA.from_chain_type(
|
@@ -47,17 +42,12 @@ def create_qa_system():
|
|
47 |
retriever=db.as_retriever(search_kwargs={"k": 2})
|
48 |
)
|
49 |
|
50 |
-
#
|
51 |
qa = create_qa_system()
|
52 |
|
53 |
-
#
|
54 |
-
def
|
55 |
response = qa({"query": message})
|
56 |
return response["result"]
|
57 |
|
58 |
-
|
59 |
-
gr.ChatInterface(
|
60 |
-
chat,
|
61 |
-
title="PDF Chatbot",
|
62 |
-
description="Upload your PDF in Files tab ➡️ Ask questions!",
|
63 |
-
).launch()
|
|
|
5 |
from langchain_community.document_loaders import PyMuPDFLoader
|
6 |
from langchain_text_splitters import CharacterTextSplitter
|
7 |
from langchain.chains import RetrievalQA
|
8 |
+
from langchain_community.llms import HuggingFaceEndpoint # Updated import
|
9 |
from huggingface_hub import login
|
10 |
|
11 |
+
# 1. Authentication
|
12 |
+
login(token=os.environ.get('HF_TOKEN'))
|
|
|
13 |
|
14 |
+
# 2. PDF Processing
|
15 |
def create_qa_system():
|
16 |
+
if not os.path.exists("data.pdf"):
|
17 |
+
raise gr.Error("❗ Upload data.pdf in Files tab")
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
+
loader = PyMuPDFLoader("data.pdf")
|
|
|
20 |
documents = loader.load()
|
21 |
|
|
|
22 |
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
23 |
texts = text_splitter.split_documents(documents)
|
24 |
|
25 |
+
embeddings = HuggingFaceEmbeddings(
|
26 |
+
model_name="sentence-transformers/all-MiniLM-L6-v2"
|
27 |
+
)
|
28 |
|
|
|
29 |
db = FAISS.from_documents(texts, embeddings)
|
30 |
|
31 |
+
# 3. Updated LLM initialization
|
32 |
+
llm = HuggingFaceEndpoint(
|
33 |
+
repo_id="google/flan-t5-base",
|
34 |
+
max_length=256,
|
35 |
+
temperature=0.2,
|
36 |
+
huggingfacehub_api_token=os.environ.get('HF_TOKEN') # Explicit token passing
|
37 |
)
|
38 |
|
39 |
return RetrievalQA.from_chain_type(
|
|
|
42 |
retriever=db.as_retriever(search_kwargs={"k": 2})
|
43 |
)
|
44 |
|
45 |
+
# 4. Initialize system
|
46 |
qa = create_qa_system()
|
47 |
|
48 |
+
# 5. Chat interface
|
49 |
+
def chat_response(message, history):
|
50 |
response = qa({"query": message})
|
51 |
return response["result"]
|
52 |
|
53 |
+
gr.ChatInterface(chat_response).launch()
|
|
|
|
|
|
|
|
|
|