awacke1 commited on
Commit
722702e
·
1 Parent(s): 602202c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -59
app.py CHANGED
@@ -185,7 +185,56 @@ def chat_with_file_contents(prompt, file_content, model_choice='gpt-3.5-turbo'):
185
  conversation.append({'role': 'assistant', 'content': file_content})
186
  response = openai.ChatCompletion.create(model=model_choice, messages=conversation)
187
  return response['choices'][0]['message']['content']
188
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
  def main():
191
  # Sidebar and global
@@ -306,63 +355,20 @@ if __name__ == "__main__":
306
 
307
 
308
 
 
 
309
 
 
 
 
 
310
 
311
-
312
-
313
-
314
-
315
-
316
-
317
- def extract_text_from_pdfs(pdf_docs):
318
- text = ""
319
- for pdf in pdf_docs:
320
- pdf_reader = PdfReader(pdf)
321
- for page in pdf_reader.pages:
322
- text += page.extract_text()
323
- return text
324
-
325
- def split_text_into_chunks(text):
326
- text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len)
327
- return text_splitter.split_text(text)
328
-
329
- def create_vector_store_from_text_chunks(text_chunks):
330
- key = os.getenv('OPENAI_KEY')
331
- embeddings = OpenAIEmbeddings(openai_api_key=key)
332
- return FAISS.from_texts(texts=text_chunks, embedding=embeddings)
333
-
334
- def create_conversation_chain(vectorstore):
335
- llm = ChatOpenAI()
336
- memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
337
- return ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(), memory=memory)
338
-
339
- def process_user_input(user_question):
340
- response = st.session_state.conversation({'question': user_question})
341
- st.session_state.chat_history = response['chat_history']
342
-
343
- for i, message in enumerate(st.session_state.chat_history):
344
- template = user_template if i % 2 == 0 else bot_template
345
- st.write(template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
346
-
347
- #def main():
348
- load_dotenv()
349
- #st.set_page_config(page_title="Chat with multiple PDFs", page_icon=":books:")
350
- st.write(css, unsafe_allow_html=True)
351
-
352
- st.header("Chat with multiple PDFs :books:")
353
- user_question = st.text_input("Ask a question about your documents:")
354
- if user_question:
355
- process_user_input(user_question)
356
-
357
- with st.sidebar:
358
- st.subheader("Your documents")
359
- pdf_docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
360
- if st.button("Process"):
361
- with st.spinner("Processing"):
362
- raw_text = extract_text_from_pdfs(pdf_docs)
363
- text_chunks = split_text_into_chunks(raw_text)
364
- vectorstore = create_vector_store_from_text_chunks(text_chunks)
365
- st.session_state.conversation = create_conversation_chain(vectorstore)
366
-
367
- #if __name__ == '__main__':
368
- # main()
 
185
  conversation.append({'role': 'assistant', 'content': file_content})
186
  response = openai.ChatCompletion.create(model=model_choice, messages=conversation)
187
  return response['choices'][0]['message']['content']
188
+
189
+
190
+
191
+
192
+
193
+
194
+
195
+
196
+
197
+
198
+
199
+
200
+
201
+
202
+ def extract_text_from_pdfs(pdf_docs):
203
+ text = ""
204
+ for pdf in pdf_docs:
205
+ pdf_reader = PdfReader(pdf)
206
+ for page in pdf_reader.pages:
207
+ text += page.extract_text()
208
+ return text
209
+
210
+ def split_text_into_chunks(text):
211
+ text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len)
212
+ return text_splitter.split_text(text)
213
+
214
+ def create_vector_store_from_text_chunks(text_chunks):
215
+ key = os.getenv('OPENAI_KEY')
216
+ embeddings = OpenAIEmbeddings(openai_api_key=key)
217
+ return FAISS.from_texts(texts=text_chunks, embedding=embeddings)
218
+
219
+ def create_conversation_chain(vectorstore):
220
+ llm = ChatOpenAI()
221
+ memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
222
+ return ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(), memory=memory)
223
+
224
+ def process_user_input(user_question):
225
+ response = st.session_state.conversation({'question': user_question})
226
+ st.session_state.chat_history = response['chat_history']
227
+
228
+ for i, message in enumerate(st.session_state.chat_history):
229
+ template = user_template if i % 2 == 0 else bot_template
230
+ st.write(template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
231
+
232
+
233
+
234
+
235
+
236
+
237
+
238
 
239
  def main():
240
  # Sidebar and global
 
355
 
356
 
357
 
358
+ load_dotenv()
359
+ st.write(css, unsafe_allow_html=True)
360
 
361
+ st.header("Chat with multiple PDFs :books:")
362
+ user_question = st.text_input("Ask a question about your documents:")
363
+ if user_question:
364
+ process_user_input(user_question)
365
 
366
+ with st.sidebar:
367
+ st.subheader("Your documents")
368
+ pdf_docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
369
+ if st.button("Process"):
370
+ with st.spinner("Processing"):
371
+ raw_text = extract_text_from_pdfs(pdf_docs)
372
+ text_chunks = split_text_into_chunks(raw_text)
373
+ vectorstore = create_vector_store_from_text_chunks(text_chunks)
374
+ st.session_state.conversation = create_conversation_chain(vectorstore)