import os import gradio as gr from langchain_community.vectorstores import FAISS from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.document_loaders import PyMuPDFLoader from langchain_text_splitters import CharacterTextSplitter from langchain.chains import RetrievalQA from langchain_community.llms import HuggingFacePipeline from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline def create_qa_system(): try: # Validate PDF if not os.path.exists("file.pdf"): raise FileNotFoundError("Upload PDF via Files tab") # Process PDF loader = PyMuPDFLoader("file.pdf") documents = loader.load() if len(documents) == 0: raise ValueError("PDF is empty or corrupted") # Split text text_splitter = CharacterTextSplitter( chunk_size=300, chunk_overlap=50 ) texts = text_splitter.split_documents(documents) # Create embeddings embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2" ) # Build vector store db = FAISS.from_documents(texts, embeddings) # Initialize local model with LangChain wrapper model_name = "google/flan-t5-small" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSeq2SeqLM.from_pretrained(model_name) pipe = pipeline( "text2text-generation", model=model, tokenizer=tokenizer, max_length=128, temperature=0.2, device_map="auto" ) llm = HuggingFacePipeline(pipeline=pipe) return RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=db.as_retriever(search_kwargs={"k": 2})) except Exception as e: raise gr.Error(f"Initialization failed: {str(e)}") # Initialize system try: qa = create_qa_system() except Exception as e: print(f"Fatal error: {str(e)}") raise def chat_response(message, history): try: response = qa({"query": message}) return response["result"] except Exception as e: print(f"Error during query: {str(e)}") return f"⚠️ Error: {str(e)[:100]}" gr.ChatInterface(chat_response).launch()