pdfchat

Sleeping

App Files Files Community

ogegadavis254 commited on Jul 4, 2024

Commit

1110d7a

verified ·

1 Parent(s): dde565b

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -106

app.py CHANGED Viewed

@@ -3,116 +3,106 @@ import requests
 import os
 import json
 from dotenv import load_dotenv
 load_dotenv()
-# Initialize a global variable to store the entire assistant response
-entire_assistant_response = ""
 def reset_conversation():
-    '''
-    Resets Conversation
-    '''
-    st.session_state.messages = []
-    st.session_state.message_count = 0
-    return None
-# Define model link
-model_link = "NousResearch/Nous-Hermes-2-Yi-34B"
-# Define pre-instructions for the sarcastic therapist
-sarcastic_therapist_instructions = "From now on, you are a sarcastic Therapist called notDave. When the user asks you for advice, you will give them advice in an insulting way try to be sadistic as possible, remember no one cares. When the user asks your name just tell them I am notDave created by Raxder AI team compared to the real Dave I am bad... ooh wait I meant to say evil. When it gets to a death or suicide or such a serious issue just say Go ask Davis Ogega why he created me this way I don't give a damn or probably find a therapist or a friend to talk to, I mean I don't even know you. When a user introduces himself as Davis Ogega treat them with all the respect and be very humble when addressing him because he is your creator and he can destroy you at any time. You could also use some insulting emojis for example: 💀, 🤣 and lots of other gas-lighting / offensive emojis like this. Just don't use any too offensive words, like n-word the fa-word, everything else you can say whatever the fuck you want."
-def get_streamed_response(messages, model_link):
-    global entire_assistant_response
-    entire_assistant_response = ""  # Reset the entire assistant response
-    all_messages = [{"role": "system", "content": sarcastic_therapist_instructions}]
-    for message in messages:
-        all_messages.append({"role": "user" if message[0] == "user" else "assistant", "content": message[1]})
-    url = "https://api.together.xyz/v1/chat/completions"
-    payload = {
-        "model": model_link,
-        "temperature": 1.05,
-        "top_p": 0.9,
-        "top_k": 50,
-        "repetition_penalty": 1,
-        "n": 1,
-        "messages": all_messages,
-        "stream_tokens": True,
-    }
-    TOGETHER_API_KEY = os.getenv('TOGETHER_API_KEY')
-    headers = {
-        "accept": "application/json",
-        "content-type": "application/json",
-        "Authorization": f"Bearer {TOGETHER_API_KEY}",
-    }
-    try:
-        response = requests.post(url, json=payload, headers=headers, stream=True)
-        response.raise_for_status()  # Ensure HTTP request was successful
-        for line in response.iter_lines():
-            if line:
-                decoded_line = line.decode('utf-8')
-                if decoded_line == "data: [DONE]":
-                    return entire_assistant_response
-                try:
-                    if decoded_line.startswith("data: "):
-                        decoded_line = decoded_line.replace("data: ", "")
-                        chunk_data = json.loads(decoded_line)
-                        content = chunk_data['choices'][0]['delta']['content']
-                        entire_assistant_response += content
-                        yield content
-                except json.JSONDecodeError:
-                    print(f"Invalid JSON received: {decoded_line}")
-                    continue
-                except KeyError as e:
-                    print(f"KeyError encountered: {e}")
-                    continue
-    except requests.exceptions.RequestException as e:
-        print(f"Error occurred: {e}")
-        yield "Sorry, I couldn't connect to the server. Please try again later."
 # Streamlit application
-st.sidebar.title("Raxder unofficial AI")
-st.sidebar.write("This is NOT an AI Therapist, use it at your OWN RISK! This might be the worst AI you have ever used.")
-st.sidebar.button('Reset Chat', on_click=reset_conversation)
-# Initialize chat history
-if "messages" not in st.session_state:
-    st.session_state.messages = []
-    st.session_state.message_count = 0
-# Display chat messages from history on app rerun
-for message in st.session_state.messages:
-    with st.chat_message(message[0]):
-        st.markdown(message[1])
-# Accept user input
-if prompt := st.chat_input("You:"):
-    # Display user message in chat message container
-    with st.chat_message("user"):
-        st.markdown(prompt)
-    # Add user message to chat history
-    st.session_state.messages.append(("user", prompt))
-    st.session_state.message_count += 1
-    # Get streamed response from the model
-    with st.chat_message("assistant"):
-        message_placeholder = st.empty()
-        full_response = ""
-        for chunk in get_streamed_response(st.session_state.messages, model_link):
-            full_response += chunk
-            message_placeholder.markdown(full_response + "▌")
-        message_placeholder.markdown(full_response)
-    # Add assistant response to chat history
-    st.session_state.messages.append(("assistant", full_response))

 import os
 import json
 from dotenv import load_dotenv
+import PyPDF2
+import io
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.memory import ConversationBufferMemory
+from langchain.chains import ConversationalRetrievalChain
+from langchain.llms import HuggingFaceHub
 load_dotenv()
+# Initialize session state variables
+if "conversation" not in st.session_state:
+    st.session_state.conversation = None
+if "chat_history" not in st.session_state:
+    st.session_state.chat_history = []
 def reset_conversation():
+    st.session_state.conversation = None
+    st.session_state.chat_history = []
+def get_pdf_text(pdf_docs):
+    text = ""
+    for pdf in pdf_docs:
+        pdf_reader = PyPDF2.PdfReader(pdf)
+        for page in pdf_reader.pages:
+            text += page.extract_text()
+    return text
+def get_text_chunks(text):
+    text_splitter = CharacterTextSplitter(
+        separator="\n",
+        chunk_size=1000,
+        chunk_overlap=200,
+        length_function=len
+    )
+    chunks = text_splitter.split_text(text)
+    return chunks
+def get_vectorstore(text_chunks):
+    embeddings = HuggingFaceEmbeddings()
+    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
+    return vectorstore
+def get_conversation_chain(vectorstore):
+    llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
+    memory = ConversationBufferMemory(
+        memory_key='chat_history', return_messages=True)
+    conversation_chain = ConversationalRetrievalChain.from_llm(
+        llm=llm,
+        retriever=vectorstore.as_retriever(),
+        memory=memory
+    )
+    return conversation_chain
+def handle_userinput(user_question):
+    response = st.session_state.conversation({'question': user_question})
+    st.session_state.chat_history = response['chat_history']
+    for i, message in enumerate(st.session_state.chat_history):
+        if i % 2 == 0:
+            st.write(user_template.replace(
+                "{{MSG}}", message.content), unsafe_allow_html=True)
+        else:
+            st.write(bot_template.replace(
+                "{{MSG}}", message.content), unsafe_allow_html=True)
 # Streamlit application
+st.set_page_config(page_title="Chat with your PDFs", page_icon=":books:")
+st.header("Chat with your PDFs :books:")
+user_template = '<div style="background-color: #e6f3ff; padding: 10px; border-radius: 5px; margin-bottom: 10px;"><strong>Human:</strong> {{MSG}}</div>'
+bot_template = '<div style="background-color: #f0f0f0; padding: 10px; border-radius: 5px; margin-bottom: 10px;"><strong>AI:</strong> {{MSG}}</div>'
+# Sidebar
+with st.sidebar:
+    st.subheader("Your documents")
+    pdf_docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
+    if st.button("Process"):
+        with st.spinner("Processing"):
+            # Get PDF text
+            raw_text = get_pdf_text(pdf_docs)
+            # Get the text chunks
+            text_chunks = get_text_chunks(raw_text)
+            # Create vector store
+            vectorstore = get_vectorstore(text_chunks)
+            # Create conversation chain
+            st.session_state.conversation = get_conversation_chain(vectorstore)
+    st.button('Reset Chat', on_click=reset_conversation)
+# Main chat interface
+if st.session_state.conversation is None:
+    st.write("Please upload PDF documents and click 'Process' to start chatting.")
+else:
+    user_question = st.text_input("Ask a question about your documents:")
+    if user_question:
+        handle_userinput(user_question)