jarif commited on
Commit
01aade3
Β·
verified Β·
1 Parent(s): 13c2214

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -110
app.py CHANGED
@@ -1,111 +1,93 @@
1
- import streamlit as st
2
- import os
3
- import faiss
4
- import logging
5
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
6
- from langchain_community.embeddings import HuggingFaceEmbeddings
7
- from langchain_community.vectorstores import FAISS
8
- from langchain_community.llms import HuggingFacePipeline
9
- from langchain.chains import RetrievalQA
10
- from ingest import create_faiss_index
11
-
12
- # Set up logging
13
- logging.basicConfig(level=logging.INFO)
14
- logger = logging.getLogger(__name__)
15
-
16
- checkpoint = "LaMini-T5-738M"
17
-
18
- @st.cache_resource
19
- def load_llm():
20
- tokenizer = AutoTokenizer.from_pretrained(checkpoint)
21
- model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
22
- pipe = pipeline(
23
- 'text2text-generation',
24
- model=model,
25
- tokenizer=tokenizer,
26
- max_length=256,
27
- do_sample=True,
28
- temperature=0.3,
29
- top_p=0.95
30
- )
31
- return HuggingFacePipeline(pipeline=pipe)
32
-
33
- def validate_index_file(index_path):
34
- try:
35
- with open(index_path, 'rb') as f:
36
- data = f.read(100)
37
- logger.info(f"Successfully read {len(data)} bytes from the index file")
38
- return True
39
- except Exception as e:
40
- logger.error(f"Error validating index file: {e}")
41
- return False
42
-
43
- def load_faiss_index():
44
- index_path = "faiss_index/index.faiss"
45
- if not os.path.exists(index_path):
46
- st.warning("Index file not found. Creating a new one...")
47
- create_faiss_index()
48
-
49
- if not os.path.exists(index_path):
50
- st.error("Failed to create the FAISS index. Please check the 'docs' directory and try again.")
51
- raise RuntimeError("FAISS index creation failed.")
52
-
53
- try:
54
- index = faiss.read_index(index_path)
55
- if index is None:
56
- raise ValueError("Failed to read FAISS index.")
57
-
58
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
59
- db = FAISS.load_local("faiss_index", embeddings)
60
- if db.index is None or db.index_to_docstore_id is None:
61
- raise ValueError("FAISS index or docstore_id mapping is None.")
62
-
63
- return db.as_retriever()
64
- except Exception as e:
65
- st.error(f"Failed to load FAISS index: {e}")
66
- logger.exception("Exception in load_faiss_index")
67
- raise
68
-
69
- def process_answer(instruction):
70
- try:
71
- retriever = load_faiss_index()
72
- llm = load_llm()
73
- qa = RetrievalQA.from_chain_type(
74
- llm=llm,
75
- chain_type="stuff",
76
- retriever=retriever,
77
- return_source_documents=True
78
- )
79
- generated_text = qa.invoke(instruction)
80
- answer = generated_text['result']
81
- return answer, generated_text
82
- except Exception as e:
83
- st.error(f"An error occurred while processing the answer: {e}")
84
- logger.exception("Exception in process_answer")
85
- return "An error occurred while processing your request.", {}
86
-
87
- def main():
88
- st.title("Search Your PDF πŸ“šπŸ“")
89
-
90
- with st.expander("About the App"):
91
- st.markdown(
92
- """
93
- This is a Generative AI powered Question and Answering app that responds to questions about your PDF File.
94
- """
95
- )
96
-
97
- question = st.text_area("Enter your Question")
98
-
99
- if st.button("Ask"):
100
- st.info("Your Question: " + question)
101
- st.info("Your Answer")
102
- try:
103
- answer, metadata = process_answer(question)
104
- st.write(answer)
105
- st.write(metadata)
106
- except Exception as e:
107
- st.error(f"An unexpected error occurred: {e}")
108
- logger.exception("Unexpected error in main function")
109
-
110
- if __name__ == '__main__':
111
  main()
 
1
+ import streamlit as st
2
+ import os
3
+ import logging
4
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
5
+ from langchain_community.embeddings import HuggingFaceEmbeddings
6
+ from langchain_community.vectorstores import Chroma
7
+ from langchain_community.llms import HuggingFacePipeline
8
+ from langchain.chains import RetrievalQA
9
+ from ingest import create_chroma_db
10
+
11
+ # Set up logging
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger = logging.getLogger(__name__)
14
+
15
+ checkpoint = "LaMini-T5-738M"
16
+
17
+ @st.cache_resource
18
+ def load_llm():
19
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
20
+ model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
21
+ pipe = pipeline(
22
+ 'text2text-generation',
23
+ model=model,
24
+ tokenizer=tokenizer,
25
+ max_length=256,
26
+ do_sample=True,
27
+ temperature=0.3,
28
+ top_p=0.95
29
+ )
30
+ return HuggingFacePipeline(pipeline=pipe)
31
+
32
+ def load_chroma_db():
33
+ chroma_dir = "chroma_db"
34
+ if not os.path.exists(chroma_dir):
35
+ st.warning("Chroma database not found. Creating a new one...")
36
+ create_chroma_db()
37
+
38
+ if not os.path.exists(chroma_dir):
39
+ st.error("Failed to create the Chroma database. Please check the 'docs' directory and try again.")
40
+ raise RuntimeError("Chroma database creation failed.")
41
+
42
+ try:
43
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
44
+ db = Chroma.load_local(chroma_dir, embeddings)
45
+ return db.as_retriever()
46
+ except Exception as e:
47
+ st.error(f"Failed to load Chroma database: {e}")
48
+ logger.exception("Exception in load_chroma_db")
49
+ raise
50
+
51
+ def process_answer(instruction):
52
+ try:
53
+ retriever = load_chroma_db()
54
+ llm = load_llm()
55
+ qa = RetrievalQA.from_chain_type(
56
+ llm=llm,
57
+ chain_type="stuff",
58
+ retriever=retriever,
59
+ return_source_documents=True
60
+ )
61
+ generated_text = qa.invoke(instruction)
62
+ answer = generated_text['result']
63
+ return answer, generated_text
64
+ except Exception as e:
65
+ st.error(f"An error occurred while processing the answer: {e}")
66
+ logger.exception("Exception in process_answer")
67
+ return "An error occurred while processing your request.", {}
68
+
69
+ def main():
70
+ st.title("Search Your PDF πŸ“šπŸ“")
71
+
72
+ with st.expander("About the App"):
73
+ st.markdown(
74
+ """
75
+ This is a Generative AI powered Question and Answering app that responds to questions about your PDF File.
76
+ """
77
+ )
78
+
79
+ question = st.text_area("Enter your Question")
80
+
81
+ if st.button("Ask"):
82
+ st.info("Your Question: " + question)
83
+ st.info("Your Answer")
84
+ try:
85
+ answer, metadata = process_answer(question)
86
+ st.write(answer)
87
+ st.write(metadata)
88
+ except Exception as e:
89
+ st.error(f"An unexpected error occurred: {e}")
90
+ logger.exception("Unexpected error in main function")
91
+
92
+ if __name__ == '__main__':
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  main()