import os import gradio as gr from langchain_community.document_loaders import PyMuPDFLoader, TextLoader from langchain_text_splitters import CharacterTextSplitter from langchain_community.vectorstores import FAISS from langchain_community.embeddings import HuggingFaceEmbeddings from langchain.chains import RetrievalQA from langchain_community.llms import HuggingFacePipeline from transformers import pipeline, AutoTokenizer def load_documents(file_path="study_materials"): documents = [] for filename in os.listdir(file_path): path = os.path.join(file_path, filename) if filename.endswith(".pdf"): loader = PyMuPDFLoader(path) documents.extend(loader.load()) elif filename.endswith(".txt"): loader = TextLoader(path) documents.extend(loader.load()) return documents def create_qa_system(): try: # Load documents documents = load_documents() if not documents: raise ValueError("šŸ“š No study materials found") # Text splitting text_splitter = CharacterTextSplitter( chunk_size=1100, chunk_overlap=200, separator="\n\n" ) texts = text_splitter.split_documents(documents) # Embeddings embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2" ) # Vector store db = FAISS.from_documents(texts, embeddings) # LLM setup with proper LangChain wrapper tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base") pipe = pipeline( "text2text-generation", model="google/flan-t5-base", tokenizer=tokenizer, max_length=600, temperature=0.7, do_sample=True, top_k=50, device=-1 ) # Wrap pipeline in LangChain component llm = HuggingFacePipeline(pipeline=pipe) # Create QA chain return RetrievalQA.from_llm( llm=llm, retriever=db.as_retriever(search_kwargs={"k": 3}), return_source_documents=True ) except Exception as e: raise gr.Error(f"Error: {str(e)}") # Initialize system try: qa = create_qa_system() except Exception as e: print(f"Startup failed: {str(e)}") raise def ask_question(question, history): try: result = qa.invoke({"query": question}) answer = result["result"] sources = list({doc.metadata['source'] for doc in result['source_documents']}) return f"{answer}\n\nšŸ“š Sources: {', '.join(sources)}" except Exception as e: return f"Error: {str(e)[:150]}" gr.ChatInterface( ask_question, title="Study Assistant", description="Upload PDF/TXT files in 'study_materials' folder and ask questions!", theme="soft" ).launch()