File size: 21,536 Bytes
22d9367
 
 
 
87ee9d7
22d9367
87ee9d7
22d9367
 
87ee9d7
 
 
22d9367
87ee9d7
22d9367
 
87ee9d7
 
22d9367
87ee9d7
 
 
e7c64ec
 
 
 
87ee9d7
22d9367
 
 
87ee9d7
 
 
 
22d9367
87ee9d7
22d9367
87ee9d7
22d9367
87ee9d7
 
 
 
 
 
 
 
 
 
 
22d9367
87ee9d7
 
22d9367
87ee9d7
 
 
 
 
 
 
 
 
 
 
 
 
22d9367
 
 
 
e7c64ec
 
 
22d9367
e7c64ec
 
 
87ee9d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e7c64ec
 
 
 
87ee9d7
22d9367
 
 
87ee9d7
22d9367
87ee9d7
 
 
 
 
 
 
 
 
22d9367
87ee9d7
 
22d9367
87ee9d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22d9367
 
 
 
87ee9d7
22d9367
87ee9d7
 
 
 
 
 
 
 
 
 
 
22d9367
87ee9d7
 
 
 
 
 
 
 
 
e7c64ec
 
 
 
147ee28
e7c64ec
 
87ee9d7
 
 
 
147ee28
 
87ee9d7
 
 
147ee28
 
87ee9d7
147ee28
87ee9d7
 
 
147ee28
 
87ee9d7
147ee28
87ee9d7
 
 
147ee28
 
87ee9d7
147ee28
87ee9d7
 
 
147ee28
 
87ee9d7
147ee28
87ee9d7
 
 
147ee28
 
87ee9d7
147ee28
87ee9d7
 
 
147ee28
87ee9d7
 
e7c64ec
 
 
147ee28
 
e7c64ec
 
147ee28
e7c64ec
 
 
 
 
87ee9d7
147ee28
 
87ee9d7
 
 
 
 
 
22d9367
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "from youtube_transcript_api import YouTubeTranscriptApi\n",
    "from nltk.tokenize import TextTilingTokenizer \n",
    "from youtubesearchpython import VideosSearch\n",
    "from semantic_search import SemanticSearch \n",
    "import pandas as pd\n",
    "import gradio as gr\n",
    "import numpy as np\n",
    "import requests\n",
    "import tiktoken\n",
    "import openai\n",
    "import json\n",
    "import nltk\n",
    "import re\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "def set_openai_key(key):\n",
    "    if key == \"env\":\n",
    "        key = os.environ.get(\"OPENAI_API_KEY\")\n",
    "    openai.api_key = key\n",
    "\n",
    "def get_youtube_data(url):\n",
    "\n",
    "    video_id = url.split(\"=\")[1]\n",
    "\n",
    "    try:\n",
    "        raw = YouTubeTranscriptApi.get_transcript(video_id)\n",
    "    except:\n",
    "        try:\n",
    "            transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)\n",
    "            for transcript in transcript_list:\n",
    "                raw = transcript.translate('en').fetch()\n",
    "                break\n",
    "        except:\n",
    "            print(f\"No transcript found for {url}\") # Usually because the video itself disabled captions\n",
    "            return False\n",
    "\n",
    "    response = requests.get(f\"https://noembed.com/embed?dataType=json&url={url}\")\n",
    "    data = json.loads(response.content)\n",
    "\n",
    "    title, author = data[\"title\"], data[\"author_name\"]\n",
    "\n",
    "    # Remove any \"'\" from title\n",
    "    title = title.replace(\"'\", \"\")\n",
    "    author = author.replace(\"'\", \"\")\n",
    "\n",
    "    df = pd.DataFrame(raw)\n",
    "\n",
    "    df['end'] = df['start'] + df['duration']\n",
    "    df['total_words'] = df['text'].apply(lambda x: len(x.split())).cumsum()\n",
    "    df[\"text\"] = df[\"text\"] + \"\\n\\n\"\n",
    "\n",
    "    return df, title, author\n",
    "\n",
    "def to_timestamp(seconds):\n",
    "    seconds = int(seconds)\n",
    "\n",
    "    hours = seconds // 3600\n",
    "    minutes = (seconds % 3600) // 60\n",
    "    seconds_remaining = seconds % 60\n",
    "    \n",
    "    if seconds >= 3600:\n",
    "        return f\"{hours:02d}:{minutes:02d}:{seconds_remaining:02d}\"\n",
    "    else:\n",
    "        return f\"{minutes:02d}:{seconds_remaining:02d}\"\n",
    "\n",
    "def to_seconds(timestamp):\n",
    "    time_list = timestamp.split(':')\n",
    "    total_seconds = 0\n",
    "    if len(time_list) == 2:  # Minutes:Seconds format\n",
    "        total_seconds = int(time_list[0]) * 60 + int(time_list[1])\n",
    "    elif len(time_list) == 3:  # Hours:Minutes:Seconds format\n",
    "        total_seconds = int(time_list[0]) * 3600 + int(time_list[1]) * 60 + int(time_list[2])\n",
    "    else:\n",
    "        raise ValueError(\"Invalid timestamp format\")\n",
    "    return total_seconds\n",
    "\n",
    "def get_segments(df, title, author, split_by_topic, segment_length = 200):\n",
    "\n",
    "    transcript = df['text'].str.cat(sep=' ')\n",
    "\n",
    "    if not split_by_topic:\n",
    "        words = transcript.split()\n",
    "        segments = [' '.join(words[i:i+segment_length]) for i in range(0, len(words), segment_length)]\n",
    "    else:\n",
    "        try:\n",
    "            segments = tt.tokenize(transcript)\n",
    "        except:\n",
    "            return \"\"\n",
    "\n",
    "    segments = [segment.replace('\\n','').strip() for segment in segments]\n",
    "\n",
    "    segments_wc = [len(segment.split()) for segment in segments]\n",
    "    segments_wc = np.cumsum(segments_wc)\n",
    "\n",
    "    idx = [np.argmin(np.abs(df['total_words'] - total_words)) for total_words in segments_wc]\n",
    "\n",
    "    segments_end_times = df['end'].iloc[idx].values\n",
    "    segments_end_times = np.insert(segments_end_times, 0, 0.0)\n",
    "\n",
    "    segments_times = [f\"({to_timestamp(segments_end_times[i-1])}, {to_timestamp(segments_end_times[i])})\" for i in range(1,len(segments_end_times))]\n",
    "\n",
    "    segments_text = [f\"Segment from '{title}' by {author}\\nTimestamp: {segment_time}\\n\\n{segment}\\n\" for segment, segment_time in zip(segments, segments_times)]\n",
    "\n",
    "    return segments_text\n",
    "\n",
    "def fit_searcher(segments, n_neighbours):\n",
    "    global searcher\n",
    "    searcher.fit(segments, n_neighbors=n_neighbours)\n",
    "    return True\n",
    "\n",
    "def num_tokens(text, model):\n",
    "    encoding = tiktoken.encoding_for_model(model)\n",
    "    return len(encoding.encode(text))\n",
    "\n",
    "def refencify(text):\n",
    "    title_pattern = r\"Segment from '(.+)'\"\n",
    "    timestamp_pattern = r\"Timestamp: \\((.+)\\)\"\n",
    "\n",
    "    print(text)\n",
    "\n",
    "    title = re.search(title_pattern, text).group(1)\n",
    "    timestamp = re.search(timestamp_pattern, text).group(1).split(\",\")\n",
    "    start_timestamp, end_timestamp = timestamp\n",
    "\n",
    "    url = titles_to_urls[title]\n",
    "    start_seconds = to_seconds(start_timestamp)\n",
    "    end_seconds = to_seconds(end_timestamp)\n",
    "\n",
    "    video_iframe = f'''<iframe\n",
    "    width=\"400\"\n",
    "    height=\"240\"\n",
    "    src=\"{url.replace(\"watch?v=\", \"embed/\")}?start={start_seconds}&end={end_seconds}&controls=0\"\n",
    "    frameborder=\"0\"\n",
    "    allow=\"accelerometer; autoplay; modestbranding; encrypted-media; gyroscope; picture-in-picture\"\n",
    "    allowfullscreen\n",
    "    >\n",
    "    </iframe>'''\n",
    "\n",
    "    return start_timestamp, end_timestamp, f\"{video_iframe}\\n\\n\"\n",
    "\n",
    "def form_query(question, model, token_budget):\n",
    "\n",
    "    results = searcher(question)\n",
    "\n",
    "    introduction = 'Use the below segments from multiple youtube videos to answer the subsequent question. If the answer cannot be found in the articles, write \"I could not find an answer.\" Cite each sentence using the [title, author, timestamp] notation. Every sentence MUST have a citation!'\n",
    "\n",
    "    message = introduction\n",
    "\n",
    "    question = f\"\\n\\nQuestion: {question}\"\n",
    "\n",
    "    references = \"\"\n",
    "\n",
    "    for i, result in enumerate(results):\n",
    "        result = result + \"\\n\\n\"\n",
    "        if (\n",
    "            num_tokens(message + result + question, model=model)\n",
    "            > token_budget\n",
    "        ):\n",
    "            break\n",
    "        else:\n",
    "            message += result\n",
    "            start_timestamp, end_timestamp, iframe = refencify(result)\n",
    "            references += f\"### Segment {i+1} ({start_timestamp} - {end_timestamp}):\\n\" + iframe\n",
    "\n",
    "    # Remove the last extra two newlines\n",
    "    message = message[:-2]\n",
    "\n",
    "    references = \"Segments that might have been used to answer your question: (If you specified more segments than shown here, consider increasing your token budget)\\n\\n\" + references\n",
    "\n",
    "    return message + question, references\n",
    "\n",
    "def generate_answer(question, model, token_budget, temperature):\n",
    "    \n",
    "    message, references = form_query(question, model, token_budget)\n",
    "\n",
    "    messages = [\n",
    "        {\"role\": \"system\", \"content\": \"You answer questions about YouTube videos.\"},\n",
    "        {\"role\": \"user\", \"content\": message},\n",
    "    ]\n",
    "\n",
    "    response = openai.ChatCompletion.create(\n",
    "        model=model,\n",
    "        messages=messages,\n",
    "        temperature=temperature\n",
    "    )\n",
    "    \n",
    "    response_message = response[\"choices\"][0][\"message\"][\"content\"]\n",
    "\n",
    "    return response_message, references\n",
    "\n",
    "def add_to_dict(title, url):\n",
    "    global title_counter\n",
    "\n",
    "    if title not in titles_to_urls:\n",
    "        # This is the first occurrence of this title\n",
    "        titles_to_urls[title] = url\n",
    "        return title\n",
    "    else:\n",
    "        # This title has already been seen, so we need to add a number suffix to it\n",
    "        # First, check if we've already seen this title before\n",
    "        if title in title_counter:\n",
    "            # If we have, increment the counter\n",
    "            title_counter[title] += 1\n",
    "        else:\n",
    "            # If we haven't, start the counter at 1\n",
    "            title_counter[title] = 1\n",
    "        \n",
    "        # Add the suffix to the title\n",
    "        new_title = f\"{title} ({title_counter[title]})\"\n",
    "        \n",
    "        # Add the new title to the dictionary\n",
    "        titles_to_urls[new_title] = url\n",
    "        return new_title\n",
    "\n",
    "def search_youtube(question, n_videos):\n",
    "    videosSearch = VideosSearch(question, limit = n_videos)\n",
    "    urls = [\"https://www.youtube.com/watch?v=\" + video[\"id\"] for video in videosSearch.result()[\"result\"]]\n",
    "    print(urls)\n",
    "    return urls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "def main(openAI_key, question, n_videos, urls_text, split_by_topic, segment_length, n_neighbours, model, token_budget, temperature):\n",
    "\n",
    "    print(question)\n",
    "    print(urls_text)\n",
    "\n",
    "    set_openai_key(openAI_key)\n",
    "\n",
    "    if urls_text == \"\":\n",
    "        urls = search_youtube(question, n_videos)\n",
    "    else:\n",
    "        urls = list(set(urls_text.split(\"\\n\")))\n",
    "\n",
    "    global titles_to_urls\n",
    "    titles_to_urls = {}\n",
    "\n",
    "    segments = []\n",
    "\n",
    "    for url in urls:\n",
    "\n",
    "        if \"youtu.be\" in url:\n",
    "            url = url.replace(\"youtu.be/\", \"youtube.com/watch?v=\")\n",
    "\n",
    "        res = get_youtube_data(url)\n",
    "\n",
    "        if not res:\n",
    "            continue\n",
    "\n",
    "        df, title, author = res\n",
    "        \n",
    "        title = add_to_dict(title, url)\n",
    "\n",
    "        video_segments = get_segments(df, title, author, split_by_topic, segment_length)\n",
    "\n",
    "        segments.extend(video_segments)\n",
    "    \n",
    "    if segments == []:\n",
    "        return \"Something wrong happened! Try specifying the YouTube videos or changing the query.\", \"\"\n",
    "\n",
    "    print(\"Segments generated successfully!\")\n",
    "\n",
    "    if fit_searcher(segments, n_neighbours):\n",
    "        print(\"Searcher fit successfully!\")\n",
    "        answer, references = generate_answer(question, model, token_budget, temperature)\n",
    "\n",
    "    print(answer)\n",
    "\n",
    "    return answer, references"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[nltk_data] Downloading package stopwords to\n",
      "[nltk_data]     C:\\Users\\andrew\\AppData\\Roaming\\nltk_data...\n",
      "[nltk_data]   Package stopwords is already up-to-date!\n"
     ]
    }
   ],
   "source": [
    "nltk.download('stopwords')\n",
    "tt = TextTilingTokenizer()\n",
    "searcher = SemanticSearch()\n",
    "\n",
    "# Initialize a counter for duplicate titles\n",
    "title_counter = {}\n",
    "\n",
    "# One to one mapping from titles to urls\n",
    "titles_to_urls = {}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "What it does after extracting only the code\n",
      "https://youtu.be/l_r_b38VXmQ\n",
      "Segments generated successfully!\n",
      "Fitting with n=5...\n",
      "Searcher fit successfully!\n",
      "Segment from 'GPT-4 Auto Coder 2 reads docs from URL links and Auto Debugs' by echohive\n",
      "Timestamp: (12:01, 13:12)\n",
      "\n",
      "exact same thing we tried at first right after this neural network demonstration so look so it's finished it and ran it there we go and test input I have successfully created synthetic data set and it's given its final answer yeah it's pretty crazy so look let's just grab our instructions without any links and give it to gpt4 and see how far it goes so you didn't even know what to do but I have added return of python script for this let's see what it does now as we see here we got some code loading of the text is fine but there's nothing about link chain it just simply doesn't know what Lang chain is it doesn't know what a QA chain is it's just trying to come up with something cool as far as it knows what's best but this is not what we were looking for by the way we're gonna review the code here in a moment but all the necessary code will be available to my patreon supporters the link will be in the description now let's try it with web browsing with gpt4 this is Cape this uses gpt4 and it\n",
      "\n",
      "\n",
      "\n",
      "Segment from 'GPT-4 Auto Coder 2 reads docs from URL links and Auto Debugs' by echohive\n",
      "Timestamp: (07:31, 08:38)\n",
      "\n",
      "was mostly right it's 95 there all you have to do is just add Dot Page content over here and a good user question and you're good to go which would have taken somehow maybe longer uh to try to figure out from the documentation alone so as we see in the first interaction it actually received an error see set the sequence up so it actually did really well this time but it got the expected string because it had a problem with the how the character text splitter creates the document because it's a tuple so it actually decide to print it see as you see it actually prints it and it understands that it has to use page contents here let's say see says I can see that the text variable contains a list of document objects I need to extract the page content and here it's actually using that text first element of the text the page content versus up here when it was receiving the error it was only trying to let's see where is it it was only trying to use the first element which is the Tuple object right here right the list\n",
      "\n",
      "\n",
      "\n",
      "Segment from 'GPT-4 Auto Coder 2 reads docs from URL links and Auto Debugs' by echohive\n",
      "Timestamp: (08:38, 09:46)\n",
      "\n",
      "the Tuple within a list does because at the end there's some metadata actually on the third try see it was able to create a code that actually worked perfectly it actually was able to get the answer right now it's actually giving us the final answer but we can just copy this for example you do have to run this in an environment which you have all these packages installed we're going to talk about the requirements here in a moment but if you were to bring it into this test and just run this let me see we are loading our QA chain text splitter we are loading Isaac newton.text and getting first chunk is the first element's page content and we are input inputting that is the document and question is what did Isaac Newton discover me right oh I have to stop that and whenever you run this is actually a working code line chain code question over question answering over documents is you have split documents into three but we are only using the first document and Isaac Michigan discovered the laws of motion and universal gravitation so I found that this works really well\n",
      "\n",
      "\n",
      "\n",
      "Segment from 'GPT-4 Auto Coder 2 reads docs from URL links and Auto Debugs' by echohive\n",
      "Timestamp: (04:13, 05:20)\n",
      "\n",
      "lag format and if we take a look at the documentation as we see actually in the question answering stuff not in nowhere in the two pages stuff chain type is mentioned it's only using mapreduce and we're not going to be using summarize we're saying only question answering QA chain we're not even specifying the exact name and also with the characters text splitter we're saying chunk size to 500 and also the overlap of zero and this will of course split it into multiple pieces but we said that we only want to work with the first element of that first chunk or the first element of the list so as we see so we review it we have the summarize elements we have the QA elements and the text splitter once we know we have all our elements all we have to do is just put our instructions in here I have implemented multi-line input so you can actually paste code here as well because it's a multi-line input we do have to say done and click enter and now we're entering agent executor chain so this is the fun part as you see I have heavily\n",
      "\n",
      "\n",
      "\n",
      "Segment from 'GPT-4 Auto Coder 2 reads docs from URL links and Auto Debugs' by echohive\n",
      "Timestamp: (00:00, 01:07)\n",
      "\n",
      "hey everybody today we're gonna look at different implementation of Auto GPT which writes code debugs it automatically all while using documentation as its guidance what we're going to do is we're going to load this page this URL question answering and summarize analyze document URL and character text Splitter from blank chain and then we're going to ask and we will be using a python agent with the python Ripple which is able to execute code and gets feedback from the output and takes in user instructions to write code whatever you like let's just run this the best way to understand it is to see it when we run this code we are first greeted that the script this script will be Auto executing AI generator code this may be dangerous make sure you're observing the process we have to say accept here once we type in accept and click enter we are asked do you want to load any URLs we're going to say yes we don't have to load URLs if you don't want to you can just give it instructions without the urls I've set it up so that you can load up to three\n",
      "\n",
      "\n",
      "\n",
      "I could not find an answer.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "('I could not find an answer.',\n",
       " 'Segments that might have been used to answer your question: (If you specified more segments than shown here, consider increasing your token budget)\\n\\n### Segment 1 (12:01 -  13:12):\\n<iframe\\n    width=\"400\"\\n    height=\"240\"\\n    src=\"https://youtube.com/embed/l_r_b38VXmQ?start=721&end=792&controls=0\"\\n    frameborder=\"0\"\\n    allow=\"accelerometer; autoplay; modestbranding; encrypted-media; gyroscope; picture-in-picture\"\\n    allowfullscreen\\n    >\\n    </iframe>\\n\\n### Segment 2 (07:31 -  08:38):\\n<iframe\\n    width=\"400\"\\n    height=\"240\"\\n    src=\"https://youtube.com/embed/l_r_b38VXmQ?start=451&end=518&controls=0\"\\n    frameborder=\"0\"\\n    allow=\"accelerometer; autoplay; modestbranding; encrypted-media; gyroscope; picture-in-picture\"\\n    allowfullscreen\\n    >\\n    </iframe>\\n\\n### Segment 3 (08:38 -  09:46):\\n<iframe\\n    width=\"400\"\\n    height=\"240\"\\n    src=\"https://youtube.com/embed/l_r_b38VXmQ?start=518&end=586&controls=0\"\\n    frameborder=\"0\"\\n    allow=\"accelerometer; autoplay; modestbranding; encrypted-media; gyroscope; picture-in-picture\"\\n    allowfullscreen\\n    >\\n    </iframe>\\n\\n### Segment 4 (04:13 -  05:20):\\n<iframe\\n    width=\"400\"\\n    height=\"240\"\\n    src=\"https://youtube.com/embed/l_r_b38VXmQ?start=253&end=320&controls=0\"\\n    frameborder=\"0\"\\n    allow=\"accelerometer; autoplay; modestbranding; encrypted-media; gyroscope; picture-in-picture\"\\n    allowfullscreen\\n    >\\n    </iframe>\\n\\n### Segment 5 (00:00 -  01:07):\\n<iframe\\n    width=\"400\"\\n    height=\"240\"\\n    src=\"https://youtube.com/embed/l_r_b38VXmQ?start=0&end=67&controls=0\"\\n    frameborder=\"0\"\\n    allow=\"accelerometer; autoplay; modestbranding; encrypted-media; gyroscope; picture-in-picture\"\\n    allowfullscreen\\n    >\\n    </iframe>\\n\\n')"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "main(openAI_key=\"env\", \n",
    "    question=\"What it does after extracting only the code\", n_videos=5, \n",
    "    urls_text=\"https://youtu.be/l_r_b38VXmQ\", \n",
    "    split_by_topic=False, \n",
    "    segment_length=200, \n",
    "    n_neighbours=5, \n",
    "    model=\"gpt-3.5-turbo\", \n",
    "    token_budget=2000, \n",
    "    temperature=0)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.0"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}