File size: 2,354 Bytes
68420b4
6674899
21206fd
6674899
8a6b9ad
6674899
1084bdb
6674899
68420b4
62390c0
21206fd
68420b4
 
 
137d750
ebee81a
68420b4
ebee81a
68420b4
 
f7672aa
68420b4
 
 
f7672aa
68420b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd9b7f6
68420b4
bd9b7f6
21206fd
68420b4
 
 
 
 
 
6674899
68420b4
137d750
68420b4
 
 
 
 
 
6674899
137d750
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# app.py
import os
import gradio as gr
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_community.llms import HuggingFaceEndpoint
from huggingface_hub import login

# 1. Authentication with validation
if not os.environ.get('HF_TOKEN'):
    raise RuntimeError("Add HF_TOKEN in Space secrets!")
login(token=os.environ.get('HF_TOKEN'))

# 2. PDF processing with error handling
def create_qa_system():
    try:
        # File check
        if not os.path.exists("file.pdf"):
            raise FileNotFoundError("PDF missing")
        
        # Load and validate PDF
        loader = PyMuPDFLoader("file.pdf")
        documents = loader.load()
        if len(documents) == 0:
            raise ValueError("PDF content empty")
            
        # Processing
        text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=50)
        texts = text_splitter.split_documents(documents)
        
        embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2"
        )
        
        db = FAISS.from_documents(texts, embeddings)
        
        # Smaller model for free tier
        llm = HuggingFaceEndpoint(
            repo_id="google/flan-t5-small",
            max_length=128,
            temperature=0.2,
            huggingfacehub_api_token=os.environ.get('HF_TOKEN')
        )
        
        return RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=db.as_retriever(search_kwargs={"k": 1})
        )  # Closing parenthesis added here
    except Exception as e:
        raise gr.Error(f"Setup failed: {str(e)}")

# 3. Initialize system
try:
    qa = create_qa_system()
except Exception as e:
    print(f"❌ Critical error: {str(e)}")
    raise

# 4. Chat interface with error messages
def chat_response(message, history):
    try:
        response = qa({"query": message})
        return response["result"]
    except Exception as e:
        print(f"🚨 User-facing error: {str(e)}")  # Detailed log
        return f"Error: {str(e)}"  # User message

gr.ChatInterface(chat_response).launch()