Spaces:

awacke1
/

ChatGPT-QA-Translation-Summary-14

Runtime error

App Files Files Community

awacke1 commited on Jul 2, 2023

Commit

602202c

1 Parent(s): 3e0ae36

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -1

app.py CHANGED Viewed

@@ -17,6 +17,18 @@ from bs4 import BeautifulSoup
 from collections import deque
 from audio_recorder_streamlit import audio_recorder
 def generate_filename(prompt, file_type):
     central = pytz.timezone('US/Central')
     safe_date_time = datetime.now(central).strftime("%m%d_%I%M")
@@ -290,4 +302,67 @@ def main():
             st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
 if __name__ == "__main__":
-    main()

 from collections import deque
 from audio_recorder_streamlit import audio_recorder
+from dotenv import load_dotenv
+from PyPDF2 import PdfReader
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.chat_models import ChatOpenAI
+from langchain.memory import ConversationBufferMemory
+from langchain.chains import ConversationalRetrievalChain
+from htmlTemplates import css, bot_template, user_template
 def generate_filename(prompt, file_type):
     central = pytz.timezone('US/Central')
     safe_date_time = datetime.now(central).strftime("%m%d_%I%M")
             st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
 if __name__ == "__main__":
+    main()
+def extract_text_from_pdfs(pdf_docs):
+    text = ""
+    for pdf in pdf_docs:
+        pdf_reader = PdfReader(pdf)
+        for page in pdf_reader.pages:
+            text += page.extract_text()
+    return text
+def split_text_into_chunks(text):
+    text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len)
+    return text_splitter.split_text(text)
+def create_vector_store_from_text_chunks(text_chunks):
+    key = os.getenv('OPENAI_KEY')
+    embeddings = OpenAIEmbeddings(openai_api_key=key)
+    return FAISS.from_texts(texts=text_chunks, embedding=embeddings)
+def create_conversation_chain(vectorstore):
+    llm = ChatOpenAI()
+    memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
+    return ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(), memory=memory)
+def process_user_input(user_question):
+    response = st.session_state.conversation({'question': user_question})
+    st.session_state.chat_history = response['chat_history']
+    for i, message in enumerate(st.session_state.chat_history):
+        template = user_template if i % 2 == 0 else bot_template
+        st.write(template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
+#def main():
+    load_dotenv()
+    #st.set_page_config(page_title="Chat with multiple PDFs", page_icon=":books:")
+    st.write(css, unsafe_allow_html=True)
+    st.header("Chat with multiple PDFs :books:")
+    user_question = st.text_input("Ask a question about your documents:")
+    if user_question:
+        process_user_input(user_question)
+    with st.sidebar:
+        st.subheader("Your documents")
+        pdf_docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
+        if st.button("Process"):
+            with st.spinner("Processing"):
+                raw_text = extract_text_from_pdfs(pdf_docs)
+                text_chunks = split_text_into_chunks(raw_text)
+                vectorstore = create_vector_store_from_text_chunks(text_chunks)
+                st.session_state.conversation = create_conversation_chain(vectorstore)
+#if __name__ == '__main__':
+#    main()