Brahmadev619 commited on
Commit
407931b
·
verified ·
1 Parent(s): 362ff5c

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -0
app.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import streamlit as st
4
+ from dotenv import load_dotenv
5
+ from PyPDF2 import PdfReader
6
+ from langchain.text_splitter import CharacterTextSplitter
7
+ from langchain_openai import OpenAIEmbeddings
8
+ from langchain.vectorstores import FAISS
9
+ # from langchain_community.vectorstores import FAISS
10
+ from langchain.embeddings import HuggingFaceEmbeddings
11
+ from langchain.memory import ConversationBufferMemory
12
+ from langchain.chains import ConversationalRetrievalChain
13
+ from langchain.chat_models import ChatOpenAI
14
+ from htmlTemplates import css, bot_template, user_template
15
+ from langchain.embeddings import HuggingFaceInstructEmbeddings
16
+ from langchain.llms import HuggingFaceHub
17
+ import os
18
+ def get_pdf_text(pdf_doc):
19
+ text = ""
20
+ for pdf in pdf_doc:
21
+ pdf_reader = PdfReader(pdf)
22
+ for page in pdf_reader.pages:
23
+ text += page.extract_text()
24
+ return text
25
+
26
+
27
+ def get_text_chunk(row_text):
28
+ text_splitter = CharacterTextSplitter(
29
+ separator="\n",
30
+ chunk_size = 1000,
31
+ chunk_overlap = 200,
32
+ length_function = len
33
+ )
34
+ chunk = text_splitter.split_text(row_text)
35
+ return chunk
36
+
37
+
38
+ def get_vectorstore(text_chunk):
39
+ embeddings = OpenAIEmbeddings(openai_api_key = os.getenv("OPENAI_API_KEY"))
40
+ # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
41
+ vector = FAISS.from_texts(text_chunk,embeddings)
42
+ return vector
43
+
44
+
45
+ def get_conversation_chain(vectorstores):
46
+ llm = ChatOpenAI(openai_api_key = os.getenv("OPENAI_API_KEY"))
47
+ # llm = HuggingFaceHub(repo_id="google/flan-t5-base", model_kwargs={"temperature":0.5, "max_length":512})
48
+ memory = ConversationBufferMemory(memory_key = "chat_history",return_messages = True)
49
+ conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm,
50
+ retriever=vectorstores.as_retriever(),
51
+ memory=memory)
52
+ return conversation_chain
53
+
54
+
55
+ def user_input(user_question):
56
+ response = st.session_state.conversation({"question":user_question})
57
+ st.session_state.chat_history = response["chat_history"]
58
+
59
+ for indx, msg in enumerate(st.session_state.chat_history):
60
+ if indx % 2==0:
61
+ st.write(user_template.replace("{{MSG}}",msg.content), unsafe_allow_html=True)
62
+ else:
63
+ st.write(bot_template.replace("{{MSG}}", msg.content), unsafe_allow_html=True)
64
+
65
+
66
+
67
+ def main():
68
+ # load secret key
69
+ load_dotenv()
70
+
71
+ # config the pg
72
+ st.set_page_config(page_title="Chat with multiple PDFs" ,page_icon=":books:")
73
+ st.write(css, unsafe_allow_html=True)
74
+ if "conversation" not in st.session_state:
75
+ st.session_state.conversation = None
76
+
77
+ st.header("Chat with multiple PDFs :books:")
78
+ user_question = st.text_input("Ask a question about your docs")
79
+ if user_question:
80
+ user_input(user_question)
81
+
82
+ # st.write(user_template.replace("{{MSG}}","Hello Robot"), unsafe_allow_html=True)
83
+ # st.write(bot_template.replace("{{MSG}}","Hello Human"), unsafe_allow_html=True)
84
+
85
+ # create side bar
86
+ with st.sidebar:
87
+ st.subheader("Your Documents")
88
+ pdf_doc = st.file_uploader(label="Upload your documents",accept_multiple_files=True)
89
+ if st.button("Process"):
90
+ with st.spinner(text="Processing"):
91
+
92
+ # get pdf text
93
+ row_text = get_pdf_text(pdf_doc)
94
+ # get the text chunk
95
+ text_chunk = get_text_chunk(row_text)
96
+ # st.write(text_chunk)
97
+ # create vecor store
98
+ vectorstores = get_vectorstore(text_chunk)
99
+ # st.write(vectorstores)
100
+ # create conversation chain
101
+ st.session_state.conversation = get_conversation_chain(vectorstores)
102
+
103
+
104
+ if __name__ == "__main__":
105
+ main()