ismot somuch4subtlety commited on
Commit
4b16400
·
0 Parent(s):

Duplicate from somuch4subtlety/pogcastGPT

Browse files

Co-authored-by: SoMuch4Subtlety <[email protected]>

Files changed (4) hide show
  1. .gitattributes +34 -0
  2. README.md +25 -0
  3. app.py +108 -0
  4. requirements.txt +3 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: PogcastGPT
3
+ emoji: 💻
4
+ colorFrom: blue
5
+ colorTo: indigo
6
+ sdk: streamlit
7
+ sdk_version: 1.10.0
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: somuch4subtlety/pogcastGPT
11
+ ---
12
+
13
+
14
+ This app uses semantic search to find and summarize relevant sections of the Pogcast to answer a user's question.
15
+
16
+ The process began by downloading and transcribing Pogcast episodes using [OpenAI’s Whisper](https://github.com/openai/whisper).
17
+ The transcriptions were then chunked into sections of ~500 words and each chunk was vectorized using [OpenAI’s embedding endpoint](https://beta.openai.com/docs/guides/embeddings).
18
+ The embeddings and text are then stored in a [vector database](Pinecone.io).
19
+
20
+ When you ask a question, the text is run through the embedding endpoint and then is compared to all of the vectorized sections using cosine similarity.
21
+ The top results are used as context and passed to [OpenAI’s GPT-3 completion endpoint](https://beta.openai.com/docs/api-reference/completions) along with your question and an explanation of how GPT-3 should answer the question.
22
+ Lastly, the summary answer and top matching sections are displayed.
23
+
24
+ Note
25
+ The parameters and completion prompt are set loosely and the bot is likely to hallucinate during its anwsers.
app.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pinecone
3
+ import openai
4
+ import uuid
5
+
6
+ @st.experimental_singleton
7
+ def init_pinecone():
8
+ pinecone.init(api_key=st.secrets["PINECONE_KEY"], environment="us-west1-gcp")
9
+ return pinecone.Index(st.secrets["PINECONE_INDEX"])
10
+
11
+ openai.organization = st.secrets["OPENAI_ORG"]
12
+ openai.api_key = st.secrets["OPENAI_KEY"]
13
+
14
+
15
+ def modCheck(query):
16
+ response = openai.Moderation.create(input=query)
17
+ return response["results"][0]['flagged']
18
+
19
+ def promptMaker(query, matchtext, prompt_type=None):
20
+ prompt = "The Pogcast is a weekly podcast co-hosted by Veritas and Jesse Kazam. They are both twitch streamers and on the podcast they discuss all the poggers things in life like the first-person shooter Escape from Tarkov, chess, speed-running, and everyday activities relevant to being a twitch streamer.\n"
21
+ if not prompt_type:
22
+ prompt+= "You will be given relevant snippets from the Pogcast that should help you answer or provide context to an inquiry. \n" + \
23
+ "If the inquiry is in the form of a question, answer it in a verbose manner, provide a quote from the snippets to support your answer, and provide a deep summarization of the relevant portions of the snippets.\n" + \
24
+ "If the inquiry is not in the form of a question, summarize the parts of the snippets most relevant to the inquiry.\n" + \
25
+ "Snippets:\n" + matchtext +" \nInquiry: " + query + "\nResult:"
26
+ else:
27
+ prompt+= "Use the following snippets from the podcast to write a " + prompt_type + " about " + query + "\nSnippets: " + matchtext + "\nResult:"
28
+ return prompt
29
+
30
+ def runInquiry(query):
31
+ prompt_type = None
32
+ if query.startswith("/"):
33
+ prompt_type = query.split(" ")[0][1:]
34
+ query = " ".join(query.split(" ")[1:]).strip()
35
+
36
+ if len(query)< 6:
37
+ st.error("Please ask a question with at least 6 characters")
38
+ return
39
+ with st.spinner('Checking query...'):
40
+ flagged = modCheck(query)
41
+ if flagged:
42
+ st.error("You know what you did. I ain't answering that.")
43
+ return
44
+
45
+ with st.spinner('Embedding query...'):
46
+ xq = openai.Embedding.create(input=query, engine="text-embedding-ada-002")['data'][0]['embedding']
47
+ index = init_pinecone()
48
+ res = index.query(xq, namespace=st.secrets["PINECONE_NAMESPACE"], top_k=5, include_metadata=True)
49
+ with st.spinner('Thinking...'):
50
+ matchtext = "\n".join(match['metadata']['content'] for match in res['matches'][:3])
51
+
52
+ if 'uid' not in st.session_state:
53
+ st.session_state.uid = str(uuid.uuid4())
54
+
55
+ comp = openai.Completion.create(
56
+ model="text-davinci-003",
57
+ prompt=promptMaker(query, matchtext, prompt_type),
58
+ max_tokens=2000,
59
+ temperature=0.9,
60
+ user = st.session_state.uid
61
+ )
62
+ st.markdown(f"""
63
+ <div>
64
+ <p class="lead">{comp['choices'][0]['text']}</p>
65
+ </div>
66
+ """, unsafe_allow_html=True)
67
+
68
+ for context in res['matches']:
69
+ card(
70
+ context['metadata']['episode_num'],
71
+ context['metadata']['episode_id'],
72
+ context['metadata']['start_second'],
73
+ context['metadata']['end_second'],
74
+ context['metadata']['content']
75
+ )
76
+ return (comp, res['matches'])
77
+
78
+ def card(episode, episode_id, start_second, end_second, context):
79
+ return st.markdown(f"""
80
+ <div class="container-fluid mb-2">
81
+ <div class="row align-items-start">
82
+ <div class="col-md-4 col-sm-4">
83
+ <div class="position-relative">
84
+ <iframe width="220" height="124" src="https://www.youtube.com/embed/{episode_id}?start={int(start_second)}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
85
+ </div>
86
+ </div>
87
+ <div class="col-md-8 col-sm-8">
88
+ <a href=https://www.youtube.com/watch?v={episode_id}&t={int(start_second)}s>Episode {int(episode)}</a>
89
+ <br>
90
+ <span style="color: #808080;">
91
+ <small>{context[:200].capitalize()+"...."}</small>
92
+ </span>
93
+ </div>
94
+ </div>
95
+ </div>
96
+ """, unsafe_allow_html=True)
97
+
98
+ st.markdown("<h1 style='text-align: center;'>PogcastGPT</h1>", unsafe_allow_html=True)
99
+ st.write("""
100
+ This app uses semantic search to find and summarize relevant sections of the Pogcast to answer your question
101
+ """)
102
+ st.markdown("""
103
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
104
+ """, unsafe_allow_html=True)
105
+
106
+ query = st.text_input(label="Ask me a question about the Pogcast!", max_chars=200, value="", key="inquiryBox", type='default')
107
+ if query != "":
108
+ runInquiry(query)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ pinecone-client
2
+ openai
3
+ streamlit