File size: 2,113 Bytes
6674899
21206fd
6674899
8a6b9ad
6674899
1084bdb
6674899
0fed33e
ebee81a
 
68420b4
70a1f11
f7672aa
70a1f11
68420b4
70a1f11
f7672aa
68420b4
 
70a1f11
 
 
 
 
 
 
68420b4
 
70a1f11
68420b4
 
 
 
70a1f11
68420b4
 
0fed33e
 
 
 
 
 
 
68420b4
 
 
0fed33e
68420b4
0fed33e
68420b4
f3e1614
21206fd
70a1f11
68420b4
 
 
70a1f11
68420b4
6674899
137d750
68420b4
 
 
 
70a1f11
 
6674899
aa8d70a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import os
import gradio as gr
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain.chains import RetrievalQA
from transformers import pipeline  # Local model execution

def create_qa_system():
    try:
        # Validate PDF
        if not os.path.exists("file.pdf"):
            raise FileNotFoundError("Upload PDF via Files tab")
        
        # Process PDF
        loader = PyMuPDFLoader("file.pdf")
        documents = loader.load()
        if len(documents) == 0:
            raise ValueError("PDF is empty or corrupted")
        
        # Split text
        text_splitter = CharacterTextSplitter(
            chunk_size=300,
            chunk_overlap=50
        )
        texts = text_splitter.split_documents(documents)
        
        # Create embeddings
        embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2"
        )
        
        # Build vector store
        db = FAISS.from_documents(texts, embeddings)
        
        # Local model pipeline
        qa_pipeline = pipeline(
            "text2text-generation",
            model="google/flan-t5-small",  # Runs locally
            device=-1,  # Use CPU
            max_length=128,
            temperature=0.2
        )
        
        return RetrievalQA.from_chain_type(
            llm=qa_pipeline,
            chain_type="stuff",
            retriever=db.as_retriever(search_kwargs={"k": 2})
    except Exception as e:
        raise gr.Error(f"Initialization failed: {str(e)}")

# Initialize system
try:
    qa = create_qa_system()
except Exception as e:
    print(f"Fatal error: {str(e)}")
    raise

def chat_response(message, history):
    try:
        response = qa({"query": message})
        return response["result"]
    except Exception as e:
        print(f"Error during query: {str(e)}")
        return f"⚠️ Error: {str(e)[:100]}"

gr.ChatInterface(chat_response).launch()