File size: 4,515 Bytes
cefd1c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import streamlit as st
import os
import logging
import faiss
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from ingest import create_faiss_index

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

checkpoint = "LaMini-T5-738M"

@st.cache_resource
def load_llm():
    tokenizer = AutoTokenizer.from_pretrained(checkpoint)
    model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
    pipe = pipeline(
        'text2text-generation',
        model=model,
        tokenizer=tokenizer,
        max_length=256,
        do_sample=True,
        temperature=0.3,
        top_p=0.95
    )
    return HuggingFacePipeline(pipeline=pipe)

def validate_index_file(index_path):
    try:
        if os.path.getsize(index_path) == 0:
            st.error(f"Index file '{index_path}' is empty.")
            return False
        with open(index_path, 'rb') as f:
            data = f.read(100)
        logger.info(f"Successfully read {len(data)} bytes from the index file")
        return True
    except Exception as e:
        logger.error(f"Error validating index file: {e}")
        return False

def load_faiss_index():
    index_path = "faiss_index/index.faiss"
    
    if not os.path.exists(index_path) or not validate_index_file(index_path):
        st.warning("Index file is missing or corrupted. Creating a new one...")
        if os.path.exists(index_path):
            os.remove(index_path)
            st.info("Deleted the corrupted index file.")
        create_faiss_index()

    if not os.path.exists(index_path):
        st.error("Failed to create the FAISS index. Please check the 'docs' directory and try again.")
        raise RuntimeError("FAISS index creation failed.")

    try:
        index = faiss.read_index(index_path)
        if index is None:
            raise ValueError("Failed to read FAISS index.")
        
        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        db = FAISS.load_local("faiss_index", embeddings)
        if db.index is None or db.index_to_docstore_id is None:
            raise ValueError("FAISS index or docstore_id mapping is None.")

        return db.as_retriever()
    except Exception as e:
        st.error(f"Failed to load FAISS index: {e}")
        logger.exception("Exception in load_faiss_index")
        raise

def process_answer(instruction):
    try:
        retriever = load_faiss_index()
        llm = load_llm()
        qa = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=retriever,
            return_source_documents=True
        )
        generated_text = qa.invoke(instruction)
        answer = generated_text['result']
        return answer, generated_text
    except Exception as e:
        st.error(f"An error occurred while processing the answer: {e}")
        logger.exception("Exception in process_answer")
        return "An error occurred while processing your request.", {}

def diagnose_faiss_index():
    index_path = "faiss_index/index.faiss"
    if os.path.exists(index_path):
        st.write(f"Index file size: {os.path.getsize(index_path)} bytes")
        st.write(f"Index file permissions: {oct(os.stat(index_path).st_mode)[-3:]}")
        st.write(f"Index file owner: {os.stat(index_path).st_uid}")
        st.write(f"Current process user ID: {os.getuid()}")
        validate_index_file(index_path)
    else:
        st.warning("Index file does not exist.")

def main():
    st.title("Search Your PDF πŸ“šπŸ“")
    
    with st.expander("About the App"):
        st.markdown(
            """
            This is a Generative AI powered Question and Answering app that responds to questions about your PDF File.
            """
        )

    diagnose_faiss_index()

    question = st.text_area("Enter your Question")
    
    if st.button("Ask"):
        st.info("Your Question: " + question)
        st.info("Your Answer")
        try:
            answer, metadata = process_answer(question)
            st.write(answer)
            st.write(metadata)
        except Exception as e:
            st.error(f"An unexpected error occurred: {e}")
            logger.exception("Unexpected error in main function")

if __name__ == '__main__':
    main()