awacke1 commited on
Commit
602202c
·
1 Parent(s): 3e0ae36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -1
app.py CHANGED
@@ -17,6 +17,18 @@ from bs4 import BeautifulSoup
17
  from collections import deque
18
  from audio_recorder_streamlit import audio_recorder
19
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  def generate_filename(prompt, file_type):
21
  central = pytz.timezone('US/Central')
22
  safe_date_time = datetime.now(central).strftime("%m%d_%I%M")
@@ -290,4 +302,67 @@ def main():
290
  st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
291
 
292
  if __name__ == "__main__":
293
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  from collections import deque
18
  from audio_recorder_streamlit import audio_recorder
19
 
20
+ from dotenv import load_dotenv
21
+ from PyPDF2 import PdfReader
22
+ from langchain.text_splitter import CharacterTextSplitter
23
+ from langchain.embeddings import OpenAIEmbeddings
24
+ from langchain.vectorstores import FAISS
25
+ from langchain.chat_models import ChatOpenAI
26
+ from langchain.memory import ConversationBufferMemory
27
+ from langchain.chains import ConversationalRetrievalChain
28
+ from htmlTemplates import css, bot_template, user_template
29
+
30
+
31
+
32
  def generate_filename(prompt, file_type):
33
  central = pytz.timezone('US/Central')
34
  safe_date_time = datetime.now(central).strftime("%m%d_%I%M")
 
302
  st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
303
 
304
  if __name__ == "__main__":
305
+ main()
306
+
307
+
308
+
309
+
310
+
311
+
312
+
313
+
314
+
315
+
316
+
317
+ def extract_text_from_pdfs(pdf_docs):
318
+ text = ""
319
+ for pdf in pdf_docs:
320
+ pdf_reader = PdfReader(pdf)
321
+ for page in pdf_reader.pages:
322
+ text += page.extract_text()
323
+ return text
324
+
325
+ def split_text_into_chunks(text):
326
+ text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len)
327
+ return text_splitter.split_text(text)
328
+
329
+ def create_vector_store_from_text_chunks(text_chunks):
330
+ key = os.getenv('OPENAI_KEY')
331
+ embeddings = OpenAIEmbeddings(openai_api_key=key)
332
+ return FAISS.from_texts(texts=text_chunks, embedding=embeddings)
333
+
334
+ def create_conversation_chain(vectorstore):
335
+ llm = ChatOpenAI()
336
+ memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
337
+ return ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(), memory=memory)
338
+
339
+ def process_user_input(user_question):
340
+ response = st.session_state.conversation({'question': user_question})
341
+ st.session_state.chat_history = response['chat_history']
342
+
343
+ for i, message in enumerate(st.session_state.chat_history):
344
+ template = user_template if i % 2 == 0 else bot_template
345
+ st.write(template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
346
+
347
+ #def main():
348
+ load_dotenv()
349
+ #st.set_page_config(page_title="Chat with multiple PDFs", page_icon=":books:")
350
+ st.write(css, unsafe_allow_html=True)
351
+
352
+ st.header("Chat with multiple PDFs :books:")
353
+ user_question = st.text_input("Ask a question about your documents:")
354
+ if user_question:
355
+ process_user_input(user_question)
356
+
357
+ with st.sidebar:
358
+ st.subheader("Your documents")
359
+ pdf_docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
360
+ if st.button("Process"):
361
+ with st.spinner("Processing"):
362
+ raw_text = extract_text_from_pdfs(pdf_docs)
363
+ text_chunks = split_text_into_chunks(raw_text)
364
+ vectorstore = create_vector_store_from_text_chunks(text_chunks)
365
+ st.session_state.conversation = create_conversation_chain(vectorstore)
366
+
367
+ #if __name__ == '__main__':
368
+ # main()