File size: 2,479 Bytes
6674899
21206fd
6674899
8a6b9ad
6674899
1084bdb
6674899
68420b4
62390c0
21206fd
70a1f11
68420b4
70a1f11
137d750
ebee81a
 
68420b4
70a1f11
f7672aa
70a1f11
68420b4
70a1f11
f7672aa
68420b4
 
70a1f11
 
 
 
 
 
 
68420b4
 
70a1f11
68420b4
 
 
 
70a1f11
68420b4
 
70a1f11
68420b4
 
70a1f11
 
 
 
 
68420b4
 
 
 
 
 
d12de7f
68420b4
b1f884f
21206fd
70a1f11
68420b4
 
 
70a1f11
68420b4
6674899
137d750
68420b4
 
 
 
70a1f11
 
6674899
70a1f11
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import os
import gradio as gr
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_community.llms import HuggingFaceEndpoint
from huggingface_hub import login

# Authentication
if not os.environ.get('HF_TOKEN'):
    raise ValueError("❌ Add HF_TOKEN in Space secrets!")
login(token=os.environ.get('HF_TOKEN'))

def create_qa_system():
    try:
        # Validate PDF
        if not os.path.exists("file.pdf"):
            raise FileNotFoundError("Upload PDF via Files tab")
        
        # Process PDF
        loader = PyMuPDFLoader("file.pdf")
        documents = loader.load()
        if len(documents) == 0:
            raise ValueError("PDF is empty or corrupted")
        
        # Split text
        text_splitter = CharacterTextSplitter(
            chunk_size=300,
            chunk_overlap=50
        )
        texts = text_splitter.split_documents(documents)
        
        # Create embeddings
        embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2"
        )
        
        # Build vector store
        db = FAISS.from_documents(texts, embeddings)
        
        # Initialize LLM
        llm = HuggingFaceEndpoint(
            repo_id="google/flan-t5-small",
            task="text2text-generation",
            model_kwargs={
                "temperature": 0.2,
                "max_length": 128
            },
            huggingfacehub_api_token=os.environ.get('HF_TOKEN')
        )
        
        return RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=db.as_retriever(search_kwargs={"k": 2}))
    except Exception as e:
        raise gr.Error(f"Initialization failed: {str(e)}"))

# Initialize system
try:
    qa = create_qa_system()
except Exception as e:
    print(f"Fatal error: {str(e)}")
    raise

def chat_response(message, history):
    try:
        response = qa({"query": message})
        return response["result"]
    except Exception as e:
        print(f"Error during query: {str(e)}")
        return f"⚠️ Error: {str(e)[:100]}"

# Create interface
gr.ChatInterface(
    chat_response,
    title="PDF Chat Assistant",
    description="Ask questions about your PDF document"
).launch()