7testi2

Runtime error

App Files Files Community

ismot

somuch4subtlety commited on Jan 7, 2023

Commit

4b16400

0 Parent(s):

Duplicate from somuch4subtlety/pogcastGPT

Browse files

Co-authored-by: SoMuch4Subtlety <[email protected]>

Files changed (4) hide show

.gitattributes +34 -0
README.md +25 -0
app.py +108 -0
requirements.txt +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,34 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,25 @@

+---
+title: PogcastGPT
+emoji: 💻
+colorFrom: blue
+colorTo: indigo
+sdk: streamlit
+sdk_version: 1.10.0
+app_file: app.py
+pinned: false
+duplicated_from: somuch4subtlety/pogcastGPT
+---
+This app uses semantic search to find and summarize relevant sections of the Pogcast to answer a user's question.
+The process began by downloading and transcribing Pogcast episodes using [OpenAI’s Whisper](https://github.com/openai/whisper).
+The transcriptions were then chunked into sections of ~500 words and each chunk was vectorized using [OpenAI’s embedding endpoint](https://beta.openai.com/docs/guides/embeddings).
+The embeddings and text are then stored in a [vector database](Pinecone.io).
+When you ask a question, the text is run through the embedding endpoint and then is compared to all of the vectorized sections using cosine similarity.
+The top results are used as context and passed to [OpenAI’s GPT-3 completion endpoint](https://beta.openai.com/docs/api-reference/completions) along with your question and an explanation of how GPT-3 should answer the question.
+Lastly, the summary answer and top matching sections are displayed.
+Note
+The parameters and completion prompt are set loosely and the bot is likely to hallucinate during its anwsers.

app.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import streamlit as st
+import pinecone
+import openai
+import uuid
+@st.experimental_singleton
+def init_pinecone():
+    pinecone.init(api_key=st.secrets["PINECONE_KEY"], environment="us-west1-gcp")
+    return pinecone.Index(st.secrets["PINECONE_INDEX"])
+openai.organization = st.secrets["OPENAI_ORG"]
+openai.api_key = st.secrets["OPENAI_KEY"]
+def modCheck(query):
+    response = openai.Moderation.create(input=query)
+    return response["results"][0]['flagged']
+def promptMaker(query, matchtext, prompt_type=None):
+    prompt = "The Pogcast is a weekly podcast co-hosted by Veritas and Jesse Kazam. They are both twitch streamers and on the podcast they discuss all the poggers things in life like the first-person shooter Escape from Tarkov, chess, speed-running, and everyday activities relevant to being a twitch streamer.\n"
+    if not prompt_type:
+        prompt+= "You will be given relevant snippets from the Pogcast that should help you answer or provide context to an inquiry. \n" + \
+            "If the inquiry is in the form of a question, answer it in a verbose manner, provide a quote from the snippets to support your answer, and provide a deep summarization of the relevant portions of the snippets.\n" + \
+            "If the inquiry is not in the form of a question, summarize the parts of the snippets most relevant to the inquiry.\n" + \
+            "Snippets:\n" + matchtext +"   \nInquiry: " + query + "\nResult:"
+    else:
+        prompt+= "Use the following snippets from the podcast to write a " + prompt_type + " about " + query + "\nSnippets: " + matchtext + "\nResult:"
+    return prompt
+def runInquiry(query):
+    prompt_type = None
+    if query.startswith("/"):
+        prompt_type = query.split(" ")[0][1:]
+        query = " ".join(query.split(" ")[1:]).strip()
+    if len(query)< 6:
+        st.error("Please ask a question with at least 6 characters")
+        return
+    with st.spinner('Checking query...'):
+        flagged = modCheck(query)
+        if flagged:
+            st.error("You know what you did. I ain't answering that.")
+            return
+    with st.spinner('Embedding query...'):
+        xq = openai.Embedding.create(input=query, engine="text-embedding-ada-002")['data'][0]['embedding']
+        index = init_pinecone()
+        res = index.query(xq, namespace=st.secrets["PINECONE_NAMESPACE"], top_k=5, include_metadata=True)
+    with st.spinner('Thinking...'):
+        matchtext = "\n".join(match['metadata']['content'] for match in res['matches'][:3])
+        if 'uid' not in st.session_state:
+            st.session_state.uid = str(uuid.uuid4())
+        comp = openai.Completion.create(
+            model="text-davinci-003",
+            prompt=promptMaker(query, matchtext, prompt_type),
+            max_tokens=2000,
+            temperature=0.9,
+            user = st.session_state.uid
+        )
+    st.markdown(f"""
+        <div>
+            <p class="lead">{comp['choices'][0]['text']}</p>
+        </div>
+    """, unsafe_allow_html=True)
+    for context in res['matches']:
+        card(
+            context['metadata']['episode_num'],
+            context['metadata']['episode_id'],
+            context['metadata']['start_second'],
+            context['metadata']['end_second'],
+            context['metadata']['content']
+        )
+    return (comp, res['matches'])
+def card(episode, episode_id, start_second, end_second, context):
+    return st.markdown(f"""
+    <div class="container-fluid mb-2">
+        <div class="row align-items-start">
+            <div class="col-md-4 col-sm-4">
+                 <div class="position-relative">
+                     <iframe width="220" height="124" src="https://www.youtube.com/embed/{episode_id}?start={int(start_second)}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
+                 </div>
+             </div>
+             <div  class="col-md-8 col-sm-8">
+                 <a href=https://www.youtube.com/watch?v={episode_id}&t={int(start_second)}s>Episode {int(episode)}</a>
+                 <br>
+                 <span style="color: #808080;">
+                     <small>{context[:200].capitalize()+"...."}</small>
+                 </span>
+             </div>
+        </div>
+     </div>
+        """, unsafe_allow_html=True)
+st.markdown("<h1 style='text-align: center;'>PogcastGPT</h1>", unsafe_allow_html=True)
+st.write("""
+    This app uses semantic search to find and summarize relevant sections of the Pogcast to answer your question
+""")
+st.markdown("""
+<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
+""", unsafe_allow_html=True)
+query = st.text_input(label="Ask me a question about the Pogcast!", max_chars=200, value="", key="inquiryBox", type='default')
+if query != "":
+    runInquiry(query)

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+pinecone-client
+openai
+streamlit