Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -17,6 +17,18 @@ from bs4 import BeautifulSoup
|
|
17 |
from collections import deque
|
18 |
from audio_recorder_streamlit import audio_recorder
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
def generate_filename(prompt, file_type):
|
21 |
central = pytz.timezone('US/Central')
|
22 |
safe_date_time = datetime.now(central).strftime("%m%d_%I%M")
|
@@ -290,4 +302,67 @@ def main():
|
|
290 |
st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
|
291 |
|
292 |
if __name__ == "__main__":
|
293 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
from collections import deque
|
18 |
from audio_recorder_streamlit import audio_recorder
|
19 |
|
20 |
+
from dotenv import load_dotenv
|
21 |
+
from PyPDF2 import PdfReader
|
22 |
+
from langchain.text_splitter import CharacterTextSplitter
|
23 |
+
from langchain.embeddings import OpenAIEmbeddings
|
24 |
+
from langchain.vectorstores import FAISS
|
25 |
+
from langchain.chat_models import ChatOpenAI
|
26 |
+
from langchain.memory import ConversationBufferMemory
|
27 |
+
from langchain.chains import ConversationalRetrievalChain
|
28 |
+
from htmlTemplates import css, bot_template, user_template
|
29 |
+
|
30 |
+
|
31 |
+
|
32 |
def generate_filename(prompt, file_type):
|
33 |
central = pytz.timezone('US/Central')
|
34 |
safe_date_time = datetime.now(central).strftime("%m%d_%I%M")
|
|
|
302 |
st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
|
303 |
|
304 |
if __name__ == "__main__":
|
305 |
+
main()
|
306 |
+
|
307 |
+
|
308 |
+
|
309 |
+
|
310 |
+
|
311 |
+
|
312 |
+
|
313 |
+
|
314 |
+
|
315 |
+
|
316 |
+
|
317 |
+
def extract_text_from_pdfs(pdf_docs):
|
318 |
+
text = ""
|
319 |
+
for pdf in pdf_docs:
|
320 |
+
pdf_reader = PdfReader(pdf)
|
321 |
+
for page in pdf_reader.pages:
|
322 |
+
text += page.extract_text()
|
323 |
+
return text
|
324 |
+
|
325 |
+
def split_text_into_chunks(text):
|
326 |
+
text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len)
|
327 |
+
return text_splitter.split_text(text)
|
328 |
+
|
329 |
+
def create_vector_store_from_text_chunks(text_chunks):
|
330 |
+
key = os.getenv('OPENAI_KEY')
|
331 |
+
embeddings = OpenAIEmbeddings(openai_api_key=key)
|
332 |
+
return FAISS.from_texts(texts=text_chunks, embedding=embeddings)
|
333 |
+
|
334 |
+
def create_conversation_chain(vectorstore):
|
335 |
+
llm = ChatOpenAI()
|
336 |
+
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
|
337 |
+
return ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(), memory=memory)
|
338 |
+
|
339 |
+
def process_user_input(user_question):
|
340 |
+
response = st.session_state.conversation({'question': user_question})
|
341 |
+
st.session_state.chat_history = response['chat_history']
|
342 |
+
|
343 |
+
for i, message in enumerate(st.session_state.chat_history):
|
344 |
+
template = user_template if i % 2 == 0 else bot_template
|
345 |
+
st.write(template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
|
346 |
+
|
347 |
+
#def main():
|
348 |
+
load_dotenv()
|
349 |
+
#st.set_page_config(page_title="Chat with multiple PDFs", page_icon=":books:")
|
350 |
+
st.write(css, unsafe_allow_html=True)
|
351 |
+
|
352 |
+
st.header("Chat with multiple PDFs :books:")
|
353 |
+
user_question = st.text_input("Ask a question about your documents:")
|
354 |
+
if user_question:
|
355 |
+
process_user_input(user_question)
|
356 |
+
|
357 |
+
with st.sidebar:
|
358 |
+
st.subheader("Your documents")
|
359 |
+
pdf_docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
|
360 |
+
if st.button("Process"):
|
361 |
+
with st.spinner("Processing"):
|
362 |
+
raw_text = extract_text_from_pdfs(pdf_docs)
|
363 |
+
text_chunks = split_text_into_chunks(raw_text)
|
364 |
+
vectorstore = create_vector_store_from_text_chunks(text_chunks)
|
365 |
+
st.session_state.conversation = create_conversation_chain(vectorstore)
|
366 |
+
|
367 |
+
#if __name__ == '__main__':
|
368 |
+
# main()
|