Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
from langchain_community.vectorstores import FAISS | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
from langchain_community.document_loaders import PyMuPDFLoader | |
from langchain_text_splitters import CharacterTextSplitter | |
from langchain.chains import RetrievalQA | |
from langchain_community.llms import HuggingFaceEndpoint | |
from huggingface_hub import login | |
# Authentication | |
if not os.environ.get('HF_TOKEN'): | |
raise ValueError("❌ Add HF_TOKEN in Space secrets!") | |
login(token=os.environ.get('HF_TOKEN')) | |
def create_qa_system(): | |
try: | |
# Validate PDF | |
if not os.path.exists("file.pdf"): | |
raise FileNotFoundError("Upload PDF via Files tab") | |
# Process PDF | |
loader = PyMuPDFLoader("file.pdf") | |
documents = loader.load() | |
if len(documents) == 0: | |
raise ValueError("PDF is empty or corrupted") | |
# Split text | |
text_splitter = CharacterTextSplitter( | |
chunk_size=300, | |
chunk_overlap=50 | |
) | |
texts = text_splitter.split_documents(documents) | |
# Create embeddings | |
embeddings = HuggingFaceEmbeddings( | |
model_name="sentence-transformers/all-MiniLM-L6-v2" | |
) | |
# Build vector store | |
db = FAISS.from_documents(texts, embeddings) | |
# Initialize LLM | |
llm = HuggingFaceEndpoint( | |
repo_id="google/flan-t5-small", | |
task="text2text-generation", | |
model_kwargs={ | |
"temperature": 0.2, | |
"max_length": 128 | |
}, | |
huggingfacehub_api_token=os.environ.get('HF_TOKEN') | |
) | |
return RetrievalQA.from_chain_type( | |
llm=llm, | |
chain_type="stuff", | |
retriever=db.as_retriever(search_kwargs={"k": 2})) | |
except Exception as e: | |
raise gr.Error(f"Initialization failed: {str(e)}")) | |
# Initialize system | |
try: | |
qa = create_qa_system() | |
except Exception as e: | |
print(f"Fatal error: {str(e)}") | |
raise | |
def chat_response(message, history): | |
try: | |
response = qa({"query": message}) | |
return response["result"] | |
except Exception as e: | |
print(f"Error during query: {str(e)}") | |
return f"⚠️ Error: {str(e)[:100]}" | |
# Create interface | |
gr.ChatInterface( | |
chat_response, | |
title="PDF Chat Assistant", | |
description="Ask questions about your PDF document" | |
).launch() |