File size: 4,603 Bytes
cefd1c0
 
 
 
 
 
 
 
 
51a9897
cefd1c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51a9897
 
 
 
 
 
 
cefd1c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import streamlit as st
import os
import logging
import faiss
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from ingest import create_faiss_index  # Make sure this function is updated

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

checkpoint = "LaMini-T5-738M"

@st.cache_resource
def load_llm():
    tokenizer = AutoTokenizer.from_pretrained(checkpoint)
    model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
    pipe = pipeline(
        'text2text-generation',
        model=model,
        tokenizer=tokenizer,
        max_length=256,
        do_sample=True,
        temperature=0.3,
        top_p=0.95
    )
    return HuggingFacePipeline(pipeline=pipe)

def validate_index_file(index_path):
    try:
        with open(index_path, 'rb') as f:
            data = f.read(100)
        logger.info(f"Successfully read {len(data)} bytes from the index file")
        return True
    except Exception as e:
        logger.error(f"Error validating index file: {e}")
        return False

def load_faiss_index():
    index_path = "faiss_index/index.faiss"
    if not os.path.exists(index_path):
        st.warning("Index file not found. Creating a new one...")
        # Pass the required arguments to create_faiss_index
        documents = []  # Load your documents here or from a specific directory
        texts = []      # Process your documents to get text chunks
        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        create_faiss_index(texts, embeddings)  # Ensure this is updated with correct function

    if not os.path.exists(index_path):
        st.error("Failed to create the FAISS index. Please check the 'docs' directory and try again.")
        raise RuntimeError("FAISS index creation failed.")

    try:
        index = faiss.read_index(index_path)
        if index is None:
            raise ValueError("Failed to read FAISS index.")
        
        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        db = FAISS.load_local("faiss_index", embeddings)
        if db.index is None or db.index_to_docstore_id is None:
            raise ValueError("FAISS index or docstore_id mapping is None.")

        return db.as_retriever()
    except Exception as e:
        st.error(f"Failed to load FAISS index: {e}")
        logger.exception("Exception in load_faiss_index")
        raise

def process_answer(instruction):
    try:
        retriever = load_faiss_index()
        llm = load_llm()
        qa = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=retriever,
            return_source_documents=True
        )
        generated_text = qa.invoke(instruction)
        answer = generated_text['result']
        return answer, generated_text
    except Exception as e:
        st.error(f"An error occurred while processing the answer: {e}")
        logger.exception("Exception in process_answer")
        return "An error occurred while processing your request.", {}

def diagnose_faiss_index():
    index_path = "faiss_index/index.faiss"
    if os.path.exists(index_path):
        st.write(f"Index file size: {os.path.getsize(index_path)} bytes")
        st.write(f"Index file permissions: {oct(os.stat(index_path).st_mode)[-3:]}")
        st.write(f"Index file owner: {os.stat(index_path).st_uid}")
        st.write(f"Current process user ID: {os.getuid()}")
        validate_index_file(index_path)
    else:
        st.warning("Index file does not exist.")

def main():
    st.title("Search Your PDF πŸ“šπŸ“")
    
    with st.expander("About the App"):
        st.markdown(
            """
            This is a Generative AI powered Question and Answering app that responds to questions about your PDF File.
            """
        )

    diagnose_faiss_index()

    question = st.text_area("Enter your Question")
    
    if st.button("Ask"):
        st.info("Your Question: " + question)
        st.info("Your Answer")
        try:
            answer, metadata = process_answer(question)
            st.write(answer)
            st.write(metadata)
        except Exception as e:
            st.error(f"An unexpected error occurred: {e}")
            logger.exception("Unexpected error in main function")

if __name__ == '__main__':
    main()