File size: 2,310 Bytes
68420b4
6674899
21206fd
6674899
8a6b9ad
6674899
1084bdb
6674899
68420b4
62390c0
21206fd
68420b4
 
 
137d750
ebee81a
68420b4
ebee81a
68420b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21206fd
68420b4
 
 
 
 
 
6674899
68420b4
137d750
68420b4
 
 
 
 
 
6674899
137d750
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# app.py
import os
import gradio as gr
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_community.llms import HuggingFaceEndpoint
from huggingface_hub import login

# 1. Authentication with validation
if not os.environ.get('HF_TOKEN'):
    raise RuntimeError("Add HF_TOKEN in Space secrets!")
login(token=os.environ.get('HF_TOKEN'))

# 2. PDF processing with error handling
def create_qa_system():
    try:
        # File check
        if not os.path.exists("data.pdf"):
            raise FileNotFoundError("PDF missing")
        
        # Load and validate PDF
        loader = PyMuPDFLoader("data.pdf")
        documents = loader.load()
        if len(documents) == 0:
            raise ValueError("PDF content empty")
            
        # Processing
        text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=50)
        texts = text_splitter.split_documents(documents)
        
        embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2"
        )
        
        db = FAISS.from_documents(texts, embeddings)
        
        # Smaller model for free tier
        llm = HuggingFaceEndpoint(
            repo_id="google/flan-t5-small",
            max_length=128,
            temperature=0.2,
            huggingfacehub_api_token=os.environ.get('HF_TOKEN')
        )
        
        return RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=db.as_retriever(search_kwargs={"k": 1})
    except Exception as e:
        raise gr.Error(f"Setup failed: {str(e)}")

# 3. Initialize system
try:
    qa = create_qa_system()
except Exception as e:
    print(f"❌ Critical error: {str(e)}")
    raise

# 4. Chat interface with error messages
def chat_response(message, history):
    try:
        response = qa({"query": message})
        return response["result"]
    except Exception as e:
        print(f"🚨 User-facing error: {str(e)}")  # Detailed log
        return f"Error: {str(e)}"  # User message

gr.ChatInterface(chat_response).launch()