Brahmadev619 commited on
Commit
b903054
·
verified ·
1 Parent(s): c8bb7a3

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -0
app.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from dotenv import load_dotenv
3
+ from PyPDF2 import PdfReader
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain_openai import OpenAIEmbeddings
6
+ from langchain.vectorstores import FAISS
7
+ # from langchain_community.vectorstores import FAISS
8
+ from langchain.embeddings import HuggingFaceEmbeddings
9
+ from langchain.memory import ConversationBufferMemory
10
+ from langchain.chains import ConversationalRetrievalChain
11
+ from langchain.chat_models import ChatOpenAI
12
+ from htmlTemplates import css, bot_template, user_template
13
+ from langchain.embeddings import HuggingFaceInstructEmbeddings
14
+ from langchain.llms import HuggingFaceHub
15
+
16
+ def get_pdf_text(pdf_doc):
17
+ text = ""
18
+ for pdf in pdf_doc:
19
+ pdf_reader = PdfReader(pdf)
20
+ for page in pdf_reader.pages:
21
+ text += page.extract_text()
22
+ return text
23
+
24
+
25
+ def get_text_chunk(row_text):
26
+ text_splitter = CharacterTextSplitter(
27
+ separator="\n",
28
+ chunk_size = 1000,
29
+ chunk_overlap = 200,
30
+ length_function = len
31
+ )
32
+ chunk = text_splitter.split_text(row_text)
33
+ return chunk
34
+
35
+
36
+ def get_vectorstore(text_chunk):
37
+ embeddings = OpenAIEmbeddings(openai_api_key = os.getenv("OPENAI_API_KEY"))
38
+ # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
39
+ vector = FAISS.from_texts(text_chunk,embeddings)
40
+ return vector
41
+
42
+
43
+ def get_conversation_chain(vectorstores):
44
+ llm = ChatOpenAI(openai_api_key = os.getenv("OPENAI_API_KEY"))
45
+ # llm = HuggingFaceHub(repo_id="google/flan-t5-base", model_kwargs={"temperature":0.5, "max_length":512})
46
+ memory = ConversationBufferMemory(memory_key = "chat_history",return_messages = True)
47
+ conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm,
48
+ retriever=vectorstores.as_retriever(),
49
+ memory=memory)
50
+ return conversation_chain
51
+
52
+
53
+ def user_input(user_question):
54
+ response = st.session_state.conversation({"question":user_question})
55
+ st.session_state.chat_history = response["chat_history"]
56
+
57
+ for indx, msg in enumerate(st.session_state.chat_history):
58
+ if indx % 2==0:
59
+ st.write(user_template.replace("{{MSG}}",msg.content), unsafe_allow_html=True)
60
+ else:
61
+ st.write(bot_template.replace("{{MSG}}", msg.content), unsafe_allow_html=True)
62
+
63
+
64
+
65
+ def main():
66
+ # load secret key
67
+ load_dotenv()
68
+
69
+ # config the pg
70
+ st.set_page_config(page_title="Chat with multiple PDFs" ,page_icon=":books:")
71
+ st.write(css, unsafe_allow_html=True)
72
+ if "conversation" not in st.session_state:
73
+ st.session_state.conversation = None
74
+
75
+ st.header("Chat with multiple PDFs :books:")
76
+ user_question = st.text_input("Ask a question about your docs")
77
+ if user_question:
78
+ user_input(user_question)
79
+
80
+ # st.write(user_template.replace("{{MSG}}","Hello Robot"), unsafe_allow_html=True)
81
+ # st.write(bot_template.replace("{{MSG}}","Hello Human"), unsafe_allow_html=True)
82
+
83
+ # create side bar
84
+ with st.sidebar:
85
+ st.subheader("Your Documents")
86
+ pdf_doc = st.file_uploader(label="Upload your documents",accept_multiple_files=True)
87
+ if st.button("Process"):
88
+ with st.spinner(text="Processing"):
89
+
90
+ # get pdf text
91
+ row_text = get_pdf_text(pdf_doc)
92
+ # get the text chunk
93
+ text_chunk = get_text_chunk(row_text)
94
+ # st.write(text_chunk)
95
+ # create vecor store
96
+ vectorstores = get_vectorstore(text_chunk)
97
+ # st.write(vectorstores)
98
+ # create conversation chain
99
+ st.session_state.conversation = get_conversation_chain(vectorstores)
100
+
101
+
102
+ if __name__ == "__main__":
103
+ main()