jarif commited on
Commit
658843d
Β·
verified Β·
1 Parent(s): 0f1d4a4

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +124 -125
app.py CHANGED
@@ -1,125 +1,124 @@
1
- import streamlit as st
2
- import os
3
- import shutil
4
- import faiss
5
- import logging
6
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
7
- from langchain_community.embeddings import HuggingFaceEmbeddings
8
- from langchain_community.vectorstores import FAISS
9
- from langchain_community.llms import HuggingFacePipeline
10
- from langchain.chains import RetrievalQA
11
- from ingest import create_faiss_index
12
-
13
- # Set up logging
14
- logging.basicConfig(level=logging.INFO)
15
- logger = logging.getLogger(__name__)
16
-
17
- checkpoint = "LaMini-T5-738M"
18
-
19
- @st.cache_resource
20
- def load_llm():
21
- tokenizer = AutoTokenizer.from_pretrained(checkpoint)
22
- model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
23
- pipe = pipeline(
24
- 'text2text-generation',
25
- model=model,
26
- tokenizer=tokenizer,
27
- max_length=256,
28
- do_sample=True,
29
- temperature=0.3,
30
- top_p=0.95
31
- )
32
- return HuggingFacePipeline(pipeline=pipe)
33
-
34
- def validate_index_file(index_path):
35
- try:
36
- with open(index_path, 'rb') as f:
37
- data = f.read(100)
38
- logger.info(f"Successfully read {len(data)} bytes from the index file")
39
- return True
40
- except Exception as e:
41
- logger.error(f"Error validating index file: {e}")
42
- return False
43
-
44
- def load_faiss_index():
45
- index_path = "faiss_index/index.faiss"
46
- if not os.path.exists(index_path):
47
- st.warning("Index file not found. Creating a new one...")
48
- create_faiss_index()
49
-
50
- if not os.path.exists(index_path):
51
- st.error("Failed to create the FAISS index. Please check the 'docs' directory and try again.")
52
- raise RuntimeError("FAISS index creation failed.")
53
-
54
- try:
55
- index = faiss.read_index(index_path)
56
- if index is None:
57
- raise ValueError("Failed to read FAISS index.")
58
-
59
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
60
- db = FAISS.load_local("faiss_index", embeddings)
61
- if db.index is None or db.index_to_docstore_id is None:
62
- raise ValueError("FAISS index or docstore_id mapping is None.")
63
-
64
- return db.as_retriever()
65
- except Exception as e:
66
- st.error(f"Failed to load FAISS index: {e}")
67
- logger.exception("Exception in load_faiss_index")
68
- raise
69
-
70
- def process_answer(instruction):
71
- try:
72
- retriever = load_faiss_index()
73
- llm = load_llm()
74
- qa = RetrievalQA.from_chain_type(
75
- llm=llm,
76
- chain_type="stuff",
77
- retriever=retriever,
78
- return_source_documents=True
79
- )
80
- generated_text = qa.invoke(instruction)
81
- answer = generated_text['result']
82
- return answer, generated_text
83
- except Exception as e:
84
- st.error(f"An error occurred while processing the answer: {e}")
85
- logger.exception("Exception in process_answer")
86
- return "An error occurred while processing your request.", {}
87
-
88
- def diagnose_faiss_index():
89
- index_path = "faiss_index/index.faiss"
90
- if os.path.exists(index_path):
91
- st.write(f"Index file size: {os.path.getsize(index_path)} bytes")
92
- st.write(f"Index file permissions: {oct(os.stat(index_path).st_mode)[-3:]}")
93
- st.write(f"Index file owner: {os.stat(index_path).st_uid}")
94
- st.write(f"Current process user ID: {os.getuid()}")
95
- validate_index_file(index_path)
96
- else:
97
- st.warning("Index file does not exist.")
98
-
99
- def main():
100
- st.title("Search Your PDF πŸ“šπŸ“")
101
-
102
- with st.expander("About the App"):
103
- st.markdown(
104
- """
105
- This is a Generative AI powered Question and Answering app that responds to questions about your PDF File.
106
- """
107
- )
108
-
109
- diagnose_faiss_index()
110
-
111
- question = st.text_area("Enter your Question")
112
-
113
- if st.button("Ask"):
114
- st.info("Your Question: " + question)
115
- st.info("Your Answer")
116
- try:
117
- answer, metadata = process_answer(question)
118
- st.write(answer)
119
- st.write(metadata)
120
- except Exception as e:
121
- st.error(f"An unexpected error occurred: {e}")
122
- logger.exception("Unexpected error in main function")
123
-
124
- if __name__ == '__main__':
125
- main()
 
1
+ import streamlit as st
2
+ import os
3
+ import logging
4
+ import faiss
5
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
6
+ from langchain_community.embeddings import HuggingFaceEmbeddings
7
+ from langchain_community.vectorstores import FAISS
8
+ from langchain_community.llms import HuggingFacePipeline
9
+ from langchain.chains import RetrievalQA
10
+ from ingest import create_faiss_index
11
+
12
+ # Set up logging
13
+ logging.basicConfig(level=logging.INFO)
14
+ logger = logging.getLogger(__name__)
15
+
16
+ checkpoint = "LaMini-T5-738M"
17
+
18
+ @st.cache_resource
19
+ def load_llm():
20
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
21
+ model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
22
+ pipe = pipeline(
23
+ 'text2text-generation',
24
+ model=model,
25
+ tokenizer=tokenizer,
26
+ max_length=256,
27
+ do_sample=True,
28
+ temperature=0.3,
29
+ top_p=0.95
30
+ )
31
+ return HuggingFacePipeline(pipeline=pipe)
32
+
33
+ def validate_index_file(index_path):
34
+ try:
35
+ with open(index_path, 'rb') as f:
36
+ data = f.read(100)
37
+ logger.info(f"Successfully read {len(data)} bytes from the index file")
38
+ return True
39
+ except Exception as e:
40
+ logger.error(f"Error validating index file: {e}")
41
+ return False
42
+
43
+ def load_faiss_index():
44
+ index_path = "faiss_index/index.faiss"
45
+ if not os.path.exists(index_path):
46
+ st.warning("Index file not found. Creating a new one...")
47
+ create_faiss_index()
48
+
49
+ if not os.path.exists(index_path):
50
+ st.error("Failed to create the FAISS index. Please check the 'docs' directory and try again.")
51
+ raise RuntimeError("FAISS index creation failed.")
52
+
53
+ try:
54
+ index = faiss.read_index(index_path)
55
+ if index is None:
56
+ raise ValueError("Failed to read FAISS index.")
57
+
58
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
59
+ db = FAISS.load_local("faiss_index", embeddings)
60
+ if db.index is None or db.index_to_docstore_id is None:
61
+ raise ValueError("FAISS index or docstore_id mapping is None.")
62
+
63
+ return db.as_retriever()
64
+ except Exception as e:
65
+ st.error(f"Failed to load FAISS index: {e}")
66
+ logger.exception("Exception in load_faiss_index")
67
+ raise
68
+
69
+ def process_answer(instruction):
70
+ try:
71
+ retriever = load_faiss_index()
72
+ llm = load_llm()
73
+ qa = RetrievalQA.from_chain_type(
74
+ llm=llm,
75
+ chain_type="stuff",
76
+ retriever=retriever,
77
+ return_source_documents=True
78
+ )
79
+ generated_text = qa.invoke(instruction)
80
+ answer = generated_text['result']
81
+ return answer, generated_text
82
+ except Exception as e:
83
+ st.error(f"An error occurred while processing the answer: {e}")
84
+ logger.exception("Exception in process_answer")
85
+ return "An error occurred while processing your request.", {}
86
+
87
+ def diagnose_faiss_index():
88
+ index_path = "faiss_index/index.faiss"
89
+ if os.path.exists(index_path):
90
+ st.write(f"Index file size: {os.path.getsize(index_path)} bytes")
91
+ st.write(f"Index file permissions: {oct(os.stat(index_path).st_mode)[-3:]}")
92
+ st.write(f"Index file owner: {os.stat(index_path).st_uid}")
93
+ st.write(f"Current process user ID: {os.getuid()}")
94
+ validate_index_file(index_path)
95
+ else:
96
+ st.warning("Index file does not exist.")
97
+
98
+ def main():
99
+ st.title("Search Your PDF πŸ“šπŸ“")
100
+
101
+ with st.expander("About the App"):
102
+ st.markdown(
103
+ """
104
+ This is a Generative AI powered Question and Answering app that responds to questions about your PDF File.
105
+ """
106
+ )
107
+
108
+ diagnose_faiss_index()
109
+
110
+ question = st.text_area("Enter your Question")
111
+
112
+ if st.button("Ask"):
113
+ st.info("Your Question: " + question)
114
+ st.info("Your Answer")
115
+ try:
116
+ answer, metadata = process_answer(question)
117
+ st.write(answer)
118
+ st.write(metadata)
119
+ except Exception as e:
120
+ st.error(f"An unexpected error occurred: {e}")
121
+ logger.exception("Unexpected error in main function")
122
+
123
+ if __name__ == '__main__':
124
+ main()