Spaces:
Sleeping
Sleeping
File size: 2,479 Bytes
6674899 21206fd 6674899 8a6b9ad 6674899 1084bdb 6674899 68420b4 62390c0 21206fd 70a1f11 68420b4 70a1f11 137d750 ebee81a 68420b4 70a1f11 f7672aa 70a1f11 68420b4 70a1f11 f7672aa 68420b4 70a1f11 68420b4 70a1f11 68420b4 70a1f11 68420b4 70a1f11 68420b4 70a1f11 68420b4 d12de7f 68420b4 b1f884f 21206fd 70a1f11 68420b4 70a1f11 68420b4 6674899 137d750 68420b4 70a1f11 6674899 70a1f11 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import os
import gradio as gr
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_community.llms import HuggingFaceEndpoint
from huggingface_hub import login
# Authentication
if not os.environ.get('HF_TOKEN'):
raise ValueError("❌ Add HF_TOKEN in Space secrets!")
login(token=os.environ.get('HF_TOKEN'))
def create_qa_system():
try:
# Validate PDF
if not os.path.exists("file.pdf"):
raise FileNotFoundError("Upload PDF via Files tab")
# Process PDF
loader = PyMuPDFLoader("file.pdf")
documents = loader.load()
if len(documents) == 0:
raise ValueError("PDF is empty or corrupted")
# Split text
text_splitter = CharacterTextSplitter(
chunk_size=300,
chunk_overlap=50
)
texts = text_splitter.split_documents(documents)
# Create embeddings
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
# Build vector store
db = FAISS.from_documents(texts, embeddings)
# Initialize LLM
llm = HuggingFaceEndpoint(
repo_id="google/flan-t5-small",
task="text2text-generation",
model_kwargs={
"temperature": 0.2,
"max_length": 128
},
huggingfacehub_api_token=os.environ.get('HF_TOKEN')
)
return RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=db.as_retriever(search_kwargs={"k": 2}))
except Exception as e:
raise gr.Error(f"Initialization failed: {str(e)}"))
# Initialize system
try:
qa = create_qa_system()
except Exception as e:
print(f"Fatal error: {str(e)}")
raise
def chat_response(message, history):
try:
response = qa({"query": message})
return response["result"]
except Exception as e:
print(f"Error during query: {str(e)}")
return f"⚠️ Error: {str(e)[:100]}"
# Create interface
gr.ChatInterface(
chat_response,
title="PDF Chat Assistant",
description="Ask questions about your PDF document"
).launch() |