import os import logging import faiss import streamlit as st from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline from langchain_community.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain_community.llms import HuggingFacePipeline from langchain.chains import RetrievalQA # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # HuggingFace model checkpoint checkpoint = "LaMini-T5-738M" @st.cache_resource def load_llm(): """Load the language model for text generation.""" tokenizer = AutoTokenizer.from_pretrained(checkpoint) model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint) pipe = pipeline( 'text2text-generation', model=model, tokenizer=tokenizer, max_length=256, do_sample=True, temperature=0.3, top_p=0.95 ) return HuggingFacePipeline(pipeline=pipe) def load_faiss_index(): """Load the FAISS index for vector search.""" index_path = "faiss_index/index.faiss" if not os.path.exists(index_path): st.error(f"FAISS index not found at {index_path}. Please ensure the file exists.") raise RuntimeError(f"FAISS index not found at {index_path}.") try: index = faiss.read_index(index_path) logger.info(f"FAISS index loaded successfully from {index_path}") embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") retriever = FAISS(index, embeddings) return retriever except Exception as e: st.error(f"Failed to load FAISS index: {e}") logger.exception("Exception in load_faiss_index") raise def process_answer(instruction): """Process the user's question using the QA system.""" try: retriever = load_faiss_index() llm = load_llm() qa = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True ) generated_text = qa.invoke(instruction) answer = generated_text['result'] return answer, generated_text except Exception as e: st.error(f"An error occurred while processing the answer: {e}") logger.exception("Exception in process_answer") return "An error occurred while processing your request.", {} def main(): """Main function to run the Streamlit application.""" st.title("Search Your PDF 📚📝") with st.expander("About the App"): st.markdown( """ This is a Generative AI powered Question and Answering app that responds to questions about your PDF File. """ ) question = st.text_area("Enter your Question") if st.button("Ask"): st.info("Your Question: " + question) st.info("Your Answer") try: answer, metadata = process_answer(question) st.write(answer) st.write(metadata) except Exception as e: st.error(f"An unexpected error occurred: {e}") logger.exception("Unexpected error in main function") if __name__ == '__main__': main()