Spaces:

jarif
/

AI-Powered-PDF-Document-Search-and-QA

Sleeping

App Files Files Community

jarif commited on Aug 25, 2024

Commit

db19c60

verified ·

1 Parent(s): e47208b

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -38

app.py CHANGED Viewed

@@ -1,23 +1,38 @@
 import os
-import logging
 import faiss
 import streamlit as st
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
-from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.vectorstores import FAISS
-from langchain_community.llms import HuggingFacePipeline
 from langchain.chains import RetrievalQA
-# Set up logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-# HuggingFace model checkpoint
-checkpoint = "LaMini-T5-738M"
-@st.cache_resource
 def load_llm():
-    """Load the language model for text generation."""
     tokenizer = AutoTokenizer.from_pretrained(checkpoint)
     model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
     pipe = pipeline(
@@ -29,30 +44,23 @@ def load_llm():
         temperature=0.3,
         top_p=0.95
     )
-    return HuggingFacePipeline(pipeline=pipe)
-def load_faiss_index():
-    """Load the FAISS index for vector search."""
-    index_path = "faiss_index/index.faiss"
-    if not os.path.exists(index_path):
-        st.error(f"FAISS index not found at {index_path}. Please ensure the file exists.")
-        raise RuntimeError(f"FAISS index not found at {index_path}.")
-    try:
-        index = faiss.read_index(index_path)
-        logger.info(f"FAISS index loaded successfully from {index_path}")
-        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-        retriever = FAISS(index, embeddings)
-        return retriever
-    except Exception as e:
-        st.error(f"Failed to load FAISS index: {e}")
-        logger.exception("Exception in load_faiss_index")
-        raise
-def process_answer(instruction):
-    """Process the user's question using the QA system."""
     try:
-        retriever = load_faiss_index()
         llm = load_llm()
         qa = RetrievalQA.from_chain_type(
             llm=llm,
@@ -60,16 +68,14 @@ def process_answer(instruction):
             retriever=retriever,
             return_source_documents=True
         )
-        generated_text = qa.invoke(instruction)
-        answer = generated_text['result']
-        return answer, generated_text
     except Exception as e:
         st.error(f"An error occurred while processing the answer: {e}")
-        logger.exception("Exception in process_answer")
         return "An error occurred while processing your request.", {}
 def main():
-    """Main function to run the Streamlit application."""
     st.title("Search Your PDF 📚📝")
     with st.expander("About the App"):
@@ -90,7 +96,6 @@ def main():
             st.write(metadata)
         except Exception as e:
             st.error(f"An unexpected error occurred: {e}")
-            logger.exception("Unexpected error in main function")
 if __name__ == '__main__':
     main()

 import os
 import faiss
 import streamlit as st
+from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.chains import RetrievalQA
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
+def load_faiss_index(index_path):
+    """
+    Load a FAISS index from a specified path.
+    Parameters:
+    - index_path (str): Path to the FAISS index file.
+    Returns:
+    - faiss.Index: Loaded FAISS index object.
+    """
+    if not os.path.exists(index_path):
+        st.error(f"FAISS index not found at {index_path}. Please create the index first.")
+        raise FileNotFoundError(f"FAISS index not found at {index_path}.")
+    try:
+        index = faiss.read_index(index_path)
+        st.success("FAISS index loaded successfully.")
+        return index
+    except Exception as e:
+        st.error(f"Failed to load FAISS index: {e}")
+        raise
 def load_llm():
+    """
+    Load the HuggingFace model for generating responses.
+    """
+    checkpoint = "LaMini-T5-738M"
     tokenizer = AutoTokenizer.from_pretrained(checkpoint)
     model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
     pipe = pipeline(
         temperature=0.3,
         top_p=0.95
     )
+    return pipe
+def process_answer(question):
+    """
+    Process the user's question using the FAISS index and LLM.
+    Parameters:
+    - question (str): User's question to be processed.
+    Returns:
+    - str: The answer generated by the LLM.
+    """
+    index_path = 'faiss_index/index.faiss'
+    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
     try:
+        faiss_index = load_faiss_index(index_path)
+        retriever = FAISS(index=faiss_index, embeddings=embeddings)
         llm = load_llm()
         qa = RetrievalQA.from_chain_type(
             llm=llm,
             retriever=retriever,
             return_source_documents=True
         )
+        result = qa.invoke(question)
+        answer = result['result']
+        return answer, result
     except Exception as e:
         st.error(f"An error occurred while processing the answer: {e}")
         return "An error occurred while processing your request.", {}
 def main():
     st.title("Search Your PDF 📚📝")
     with st.expander("About the App"):
             st.write(metadata)
         except Exception as e:
             st.error(f"An unexpected error occurred: {e}")
 if __name__ == '__main__':
     main()