File size: 1,779 Bytes
6674899
21206fd
6674899
8a6b9ad
6674899
 
 
62390c0
 
21206fd
62390c0
 
8a6b9ad
62390c0
 
 
 
 
6674899
62390c0
 
 
6674899
62390c0
 
 
6674899
62390c0
6674899
62390c0
 
 
 
 
6674899
62390c0
 
6674899
 
62390c0
6674899
 
62390c0
6674899
21206fd
62390c0
 
6674899
62390c0
 
 
6674899
 
62390c0
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import os
import gradio as gr
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_community.llms import HuggingFaceHub
from huggingface_hub import login

# 1. Authentication (MUST HAVE)
login(token=os.environ.get('HF_TOKEN'))

# 2. PDF Processing Function
def create_qa_system():
    # File check
    if not os.path.exists("data.pdf"):
        raise gr.Error("❌ data.pdf not found! Upload it in Space's Files tab")
    
    # Load PDF
    loader = PyMuPDFLoader("data.pdf")
    documents = loader.load()
    
    # Split text
    text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    texts = text_splitter.split_documents(documents)
    
    # Create embeddings
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    
    # Build vector store
    db = FAISS.from_documents(texts, embeddings)
    
    # Initialize LLM (Free-tier compatible)
    llm = HuggingFaceHub(
        repo_id="google/flan-t5-base",  # Changed to smaller model
        model_kwargs={"temperature": 0.2, "max_length": 256}
    )
    
    return RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=db.as_retriever(search_kwargs={"k": 2})
    )

# 3. Initialize system
qa = create_qa_system()

# 4. Chat interface
def chat(message, history):
    response = qa({"query": message})
    return response["result"]

# 5. Launch Gradio
gr.ChatInterface(
    chat,
    title="PDF Chatbot",
    description="Upload your PDF in Files tab ➡️ Ask questions!",
).launch()