random2222 commited on
Commit
137d750
·
verified ·
1 Parent(s): 1084bdb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -30
app.py CHANGED
@@ -5,40 +5,35 @@ from langchain_community.embeddings import HuggingFaceEmbeddings
5
  from langchain_community.document_loaders import PyMuPDFLoader
6
  from langchain_text_splitters import CharacterTextSplitter
7
  from langchain.chains import RetrievalQA
8
- from langchain_community.llms import HuggingFaceHub
9
  from huggingface_hub import login
10
 
11
- # 1. Authentication (MUST HAVE)
12
- from huggingface_hub import login
13
- import os
14
 
 
15
  def create_qa_system():
16
- login(token=os.environ.get('HF_TOKEN')) # Safe method
17
-
18
- # 2. PDF Processing Function
19
- def create_qa_system():
20
- # File check
21
- if not os.path.exists("file.pdf"):
22
- raise gr.Error("❌ file.pdf not found! Upload it in Space's Files tab")
23
 
24
- # Load PDF
25
- loader = PyMuPDFLoader("file.pdf")
26
  documents = loader.load()
27
 
28
- # Split text
29
  text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
30
  texts = text_splitter.split_documents(documents)
31
 
32
- # Create embeddings
33
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 
34
 
35
- # Build vector store
36
  db = FAISS.from_documents(texts, embeddings)
37
 
38
- # Initialize LLM (Free-tier compatible)
39
- llm = HuggingFaceHub(
40
- repo_id="google/flan-t5-base", # Changed to smaller model
41
- model_kwargs={"temperature": 0.2, "max_length": 256}
 
 
42
  )
43
 
44
  return RetrievalQA.from_chain_type(
@@ -47,17 +42,12 @@ def create_qa_system():
47
  retriever=db.as_retriever(search_kwargs={"k": 2})
48
  )
49
 
50
- # 3. Initialize system
51
  qa = create_qa_system()
52
 
53
- # 4. Chat interface
54
- def chat(message, history):
55
  response = qa({"query": message})
56
  return response["result"]
57
 
58
- # 5. Launch Gradio
59
- gr.ChatInterface(
60
- chat,
61
- title="PDF Chatbot",
62
- description="Upload your PDF in Files tab ➡️ Ask questions!",
63
- ).launch()
 
5
  from langchain_community.document_loaders import PyMuPDFLoader
6
  from langchain_text_splitters import CharacterTextSplitter
7
  from langchain.chains import RetrievalQA
8
+ from langchain_community.llms import HuggingFaceEndpoint # Updated import
9
  from huggingface_hub import login
10
 
11
+ # 1. Authentication
12
+ login(token=os.environ.get('HF_TOKEN'))
 
13
 
14
+ # 2. PDF Processing
15
  def create_qa_system():
16
+ if not os.path.exists("data.pdf"):
17
+ raise gr.Error("❗ Upload data.pdf in Files tab")
 
 
 
 
 
18
 
19
+ loader = PyMuPDFLoader("data.pdf")
 
20
  documents = loader.load()
21
 
 
22
  text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
23
  texts = text_splitter.split_documents(documents)
24
 
25
+ embeddings = HuggingFaceEmbeddings(
26
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
27
+ )
28
 
 
29
  db = FAISS.from_documents(texts, embeddings)
30
 
31
+ # 3. Updated LLM initialization
32
+ llm = HuggingFaceEndpoint(
33
+ repo_id="google/flan-t5-base",
34
+ max_length=256,
35
+ temperature=0.2,
36
+ huggingfacehub_api_token=os.environ.get('HF_TOKEN') # Explicit token passing
37
  )
38
 
39
  return RetrievalQA.from_chain_type(
 
42
  retriever=db.as_retriever(search_kwargs={"k": 2})
43
  )
44
 
45
+ # 4. Initialize system
46
  qa = create_qa_system()
47
 
48
+ # 5. Chat interface
49
+ def chat_response(message, history):
50
  response = qa({"query": message})
51
  return response["result"]
52
 
53
+ gr.ChatInterface(chat_response).launch()