trry / app.py
random2222's picture
Update app.py
ecbad47 verified
import os
import gradio as gr
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_community.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
def create_qa_system():
try:
# Validate PDF
if not os.path.exists("file.pdf"):
raise FileNotFoundError("Upload PDF via Files tab")
# Process PDF
loader = PyMuPDFLoader("file.pdf")
documents = loader.load()
if len(documents) == 0:
raise ValueError("PDF is empty or corrupted")
# Split text
text_splitter = CharacterTextSplitter(
chunk_size=300,
chunk_overlap=50
)
texts = text_splitter.split_documents(documents)
# Create embeddings
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
# Build vector store
db = FAISS.from_documents(texts, embeddings)
# Initialize local model with LangChain wrapper
model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
pipe = pipeline(
"text2text-generation",
model=model,
tokenizer=tokenizer,
max_length=128,
temperature=0.2,
device_map="auto"
)
llm = HuggingFacePipeline(pipeline=pipe)
return RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=db.as_retriever(search_kwargs={"k": 2}))
except Exception as e:
raise gr.Error(f"Initialization failed: {str(e)}")
# Initialize system
try:
qa = create_qa_system()
except Exception as e:
print(f"Fatal error: {str(e)}")
raise
def chat_response(message, history):
try:
response = qa({"query": message})
return response["result"]
except Exception as e:
print(f"Error during query: {str(e)}")
return f"⚠️ Error: {str(e)[:100]}"
gr.ChatInterface(chat_response).launch()