wendru18 commited on
Commit
3cc6acd
·
1 Parent(s): e7c64ec

showing references as embeded yt video segments

Browse files
Files changed (1) hide show
  1. app.py +33 -11
app.py CHANGED
@@ -19,6 +19,11 @@ title_counter = {}
19
  # One to one mapping from titles to urls
20
  titles_to_urls = {}
21
 
 
 
 
 
 
22
  def get_youtube_data(url):
23
 
24
  video_id = url.split("=")[1]
@@ -107,18 +112,31 @@ def refencify(text):
107
  timestamp_pattern = r"Timestamp: \((.+)\)"
108
 
109
  title = re.search(title_pattern, text).group(1)
110
- start_timestamp = re.search(timestamp_pattern, text).group(1).split(",")[0]
 
111
 
112
  url = titles_to_urls[title]
113
  start_seconds = to_seconds(start_timestamp)
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
- return f"Segment URL: {url}&t={start_seconds}\n" + text
116
 
117
  def form_query(question, model, token_budget):
118
 
119
  results = searcher(question)
120
 
121
- introduction = 'Use the below segments from multiple youtube videos to answer the subsequent question. If the answer cannot be found in the articles, write "I could not find an answer." Cite each references using the [title, author, timestamp] notation. Every sentence should have a citation at the end.'
122
 
123
  message = introduction
124
 
@@ -135,7 +153,8 @@ def form_query(question, model, token_budget):
135
  break
136
  else:
137
  message += result
138
- references += f"### Segment {i+1}:\n" + refencify(result)
 
139
 
140
  # Remove the last extra two newlines
141
  message = message[:-2]
@@ -160,6 +179,7 @@ def generate_answer(question, model, token_budget, temperature):
160
  )
161
 
162
  response_message = response["choices"][0]["message"]["content"]
 
163
  return response_message, references
164
 
165
  def add_to_dict(title, url):
@@ -186,10 +206,12 @@ def add_to_dict(title, url):
186
  titles_to_urls[new_title] = url
187
  return new_title
188
 
189
- def main(urls_text, question, split_by_topic, segment_length, n_neighbours, model, token_budget, temperature):
190
 
191
- global title_counter
192
- title_counter = {}
 
 
193
 
194
  urls = list(set(urls_text.split("\n")))
195
  segments = []
@@ -216,7 +238,7 @@ title = "Ask YouTube GPT 📺"
216
  with gr.Blocks() as demo:
217
 
218
  gr.Markdown(f'<center><h1>{title}</h1></center>')
219
- gr.Markdown(f'Ask YouTube GPT allows you to ask questions about a set of Youtube Videos using Universal Sentence Encoder and Open AI. The returned response cites the video title, author and timestamp in square brackets where the information is located, adding credibility to the responses and helping you locate incorrect information. If you need one, get your Open AI API key <a href="https://platform.openai.com/account/api-keys">here</a>.</p>')
220
 
221
  with gr.Row():
222
 
@@ -230,7 +252,7 @@ with gr.Blocks() as demo:
230
  question = gr.Textbox(label='Enter your question here')
231
 
232
  with gr.Accordion("Advanced Settings", open=False):
233
- split_by_topic = gr.Checkbox(label="Split segments by topic", value=True, info="Whether the video transcripts are to be segmented by topic or by word count. Splitting by topic may result in a more coherent response, but results in a slower response time, especially for lengthy videos.")
234
  segment_length = gr.Slider(label="Segment word count", minimum=50, maximum=500, step=50, value=200, visible=False)
235
 
236
  def fn(split_by_topic):
@@ -239,7 +261,7 @@ with gr.Blocks() as demo:
239
  # If the user wants to split by topic, allow them to set the maximum segment length. (Make segment_length visible)
240
  split_by_topic.change(fn, split_by_topic, segment_length)
241
 
242
- n_neighbours = gr.Slider(label="Number of segments to retrieve", minimum=1, maximum=20, step=1, value=5, info="The number of segments to retrieve from each video and feed to the GPT model for answering.")
243
  model = gr.Dropdown(label="Model", value="gpt-3.5-turbo", choices=["gpt-3.5-turbo", "gpt-4"])
244
  token_budget = gr.Slider(label="Prompt token budget", minimum=100, maximum=4000, step=100, value=1000, info="The maximum number of tokens the prompt can take.")
245
  temperature = gr.Slider(label="Temperature", minimum=0, maximum=1, step=0.1, value=0, info="The GPT model's temperature. Recommended to use a low temperature to decrease the likelihood of hallucinations.")
@@ -255,7 +277,7 @@ with gr.Blocks() as demo:
255
  with gr.TabItem("References"):
256
  references = gr.Markdown()
257
 
258
- btn.click(main, inputs=[urls_text, question, split_by_topic, segment_length, n_neighbours, model, token_budget, temperature], outputs=[answer, references])
259
 
260
  #openai.api_key = os.getenv('Your_Key_Here')
261
  demo.launch()
 
19
  # One to one mapping from titles to urls
20
  titles_to_urls = {}
21
 
22
+ def set_openai_key(key):
23
+ if key == "":
24
+ return
25
+ openai.api_key = key
26
+
27
  def get_youtube_data(url):
28
 
29
  video_id = url.split("=")[1]
 
112
  timestamp_pattern = r"Timestamp: \((.+)\)"
113
 
114
  title = re.search(title_pattern, text).group(1)
115
+ timestamp = re.search(timestamp_pattern, text).group(1).split(",")
116
+ start_timestamp, end_timestamp = timestamp
117
 
118
  url = titles_to_urls[title]
119
  start_seconds = to_seconds(start_timestamp)
120
+ end_seconds = to_seconds(end_timestamp)
121
+
122
+ video_iframe = f'''<iframe
123
+ width="320"
124
+ height="240"
125
+ src="{url.replace("watch?v=", "embed/")}?start={start_seconds}&end={end_seconds}"
126
+ frameborder="0"
127
+ allow="autoplay; encrypted-media"
128
+ allowfullscreen
129
+ controls="0"
130
+ >
131
+ </iframe>'''
132
 
133
+ return start_timestamp, end_timestamp, f"{video_iframe}\n\n"
134
 
135
  def form_query(question, model, token_budget):
136
 
137
  results = searcher(question)
138
 
139
+ introduction = 'Use the below segments from multiple youtube videos to answer the subsequent question. If the answer cannot be found in the articles, write "I could not find an answer." Cite each sentence using the [title, author, timestamp] notation.'
140
 
141
  message = introduction
142
 
 
153
  break
154
  else:
155
  message += result
156
+ start_timestamp, end_timestamp, iframe = refencify(result)
157
+ references += f"### Segment {i+1} ({start_timestamp} - {end_timestamp}):\n" + iframe
158
 
159
  # Remove the last extra two newlines
160
  message = message[:-2]
 
179
  )
180
 
181
  response_message = response["choices"][0]["message"]["content"]
182
+
183
  return response_message, references
184
 
185
  def add_to_dict(title, url):
 
206
  titles_to_urls[new_title] = url
207
  return new_title
208
 
209
+ def main(openAI_key, urls_text, question, split_by_topic, segment_length, n_neighbours, model, token_budget, temperature):
210
 
211
+ set_openai_key(openAI_key)
212
+
213
+ global titles_to_urls
214
+ titles_to_urls = {}
215
 
216
  urls = list(set(urls_text.split("\n")))
217
  segments = []
 
238
  with gr.Blocks() as demo:
239
 
240
  gr.Markdown(f'<center><h1>{title}</h1></center>')
241
+ gr.Markdown(f'Ask YouTube GPT allows you to ask questions about a set of Youtube Videos using Topic Segmentation, Universal Sentence Encoding, and Open AI. The returned response cites the video title, author and timestamp in square brackets where the information is located, adding credibility to the responses and helping you locate incorrect information. If you need one, get your Open AI API key <a href="https://platform.openai.com/account/api-keys">here</a>.</p>')
242
 
243
  with gr.Row():
244
 
 
252
  question = gr.Textbox(label='Enter your question here')
253
 
254
  with gr.Accordion("Advanced Settings", open=False):
255
+ split_by_topic = gr.Checkbox(label="Split segments by topic", value=True, info="Whether the video transcripts are to be segmented by topic or by word count. Topically-coherent segments may be more useful for question answering, but results in a slower response time, especially for lengthy videos.")
256
  segment_length = gr.Slider(label="Segment word count", minimum=50, maximum=500, step=50, value=200, visible=False)
257
 
258
  def fn(split_by_topic):
 
261
  # If the user wants to split by topic, allow them to set the maximum segment length. (Make segment_length visible)
262
  split_by_topic.change(fn, split_by_topic, segment_length)
263
 
264
+ n_neighbours = gr.Slider(label="Number of segments to retrieve", minimum=1, maximum=20, step=1, value=5, info="The number of segments to retrieve and feed to the GPT model for answering.")
265
  model = gr.Dropdown(label="Model", value="gpt-3.5-turbo", choices=["gpt-3.5-turbo", "gpt-4"])
266
  token_budget = gr.Slider(label="Prompt token budget", minimum=100, maximum=4000, step=100, value=1000, info="The maximum number of tokens the prompt can take.")
267
  temperature = gr.Slider(label="Temperature", minimum=0, maximum=1, step=0.1, value=0, info="The GPT model's temperature. Recommended to use a low temperature to decrease the likelihood of hallucinations.")
 
277
  with gr.TabItem("References"):
278
  references = gr.Markdown()
279
 
280
+ btn.click(main, inputs=[openAI_key, urls_text, question, split_by_topic, segment_length, n_neighbours, model, token_budget, temperature], outputs=[answer, references])
281
 
282
  #openai.api_key = os.getenv('Your_Key_Here')
283
  demo.launch()