Spaces:

jarif
/

AI-Powered-PDF-Document-Search-and-QA

Sleeping

File size: 3,000 Bytes

01aade3
a144f48
275a13f
d153de8
db19c60
a144f48
d153de8
db19c60
 
275a13f
 
 
db19c60
 
275a13f
db19c60
 
 
275a13f
db19c60
275a13f
db19c60
 
 
275a13f
db19c60
 
01aade3
 
db19c60
01aade3
 
 
 
 
 
 
 
 
 
 
db19c60
d153de8
db19c60
 
 
01aade3
db19c60
 
01aade3
 
 
 
 
 
 
db19c60
 
 
01aade3
275a13f
01aade3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286b934

import os
import faiss
import logging
import streamlit as st
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

# Configure logging
logging.basicConfig(level=logging.DEBUG)

def load_faiss_index(index_path):
    if not os.path.exists(index_path):
        logging.error(f"FAISS index not found at {index_path}. Please create the index first.")
        st.error(f"FAISS index not found at {index_path}. Please create the index first.")
        raise FileNotFoundError(f"FAISS index not found at {index_path}.")
    try:
        logging.info(f"Attempting to load FAISS index from {index_path}.")
        index = faiss.read_index(index_path)
        logging.info("FAISS index loaded successfully.")
        st.success("FAISS index loaded successfully.")
        return index
    except Exception as e:
        logging.error(f"Failed to load FAISS index: {e}")
        st.error(f"Failed to load FAISS index: {e}")
        raise

def load_llm():
    checkpoint = "LaMini-T5-738M"
    tokenizer = AutoTokenizer.from_pretrained(checkpoint)
    model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
    pipe = pipeline(
        'text2text-generation',
        model=model,
        tokenizer=tokenizer,
        max_length=256,
        do_sample=True,
        temperature=0.3,
        top_p=0.95
    )
    return pipe

def process_answer(question):
    index_path = 'faiss_index/index.faiss'
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    try:
        faiss_index = load_faiss_index(index_path)
        retriever = FAISS(index=faiss_index, embeddings=embeddings)
        llm = load_llm()
        qa = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=retriever,
            return_source_documents=True
        )
        result = qa.invoke(question)
        answer = result['result']
        return answer, result
    except Exception as e:
        logging.error(f"An error occurred while processing the answer: {e}")
        st.error(f"An error occurred while processing the answer: {e}")
        return "An error occurred while processing your request.", {}

def main():
    st.title("Search Your PDF 📚📝")
    with st.expander("About the App"):
        st.markdown(
            """
            This is a Generative AI powered Question and Answering app that responds to questions about your PDF File.
            """
        )
    question = st.text_area("Enter your Question")
    if st.button("Ask"):
        st.info("Your Question: " + question)
        st.info("Your Answer")
        try:
            answer, metadata = process_answer(question)
            st.write(answer)
            st.write(metadata)
        except Exception as e:
            st.error(f"An unexpected error occurred: {e}")

if __name__ == '__main__':
    main()