jisaacso219 commited on
Commit
32cb19a
·
verified ·
1 Parent(s): 063a26c

Delete 2_lab2.ipynb

Browse files
Files changed (1) hide show
  1. 2_lab2.ipynb +0 -474
2_lab2.ipynb DELETED
@@ -1,474 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "metadata": {},
6
- "source": [
7
- "## Welcome to the Second Lab - Week 1, Day 3\n",
8
- "\n",
9
- "Today we will work with lots of models! This is a way to get comfortable with APIs."
10
- ]
11
- },
12
- {
13
- "cell_type": "markdown",
14
- "metadata": {},
15
- "source": [
16
- "<table style=\"margin: 0; text-align: left; width:100%\">\n",
17
- " <tr>\n",
18
- " <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
19
- " <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
20
- " </td>\n",
21
- " <td>\n",
22
- " <h2 style=\"color:#ff7800;\">Important point - please read</h2>\n",
23
- " <span style=\"color:#ff7800;\">The way I collaborate with you may be different to other courses you've taken. I prefer not to type code while you watch. Rather, I execute Jupyter Labs, like this, and give you an intuition for what's going on. My suggestion is that you carefully execute this yourself, <b>after</b> watching the lecture. Add print statements to understand what's going on, and then come up with your own variations.<br/><br/>If you have time, I'd love it if you submit a PR for changes in the community_contributions folder - instructions in the resources. Also, if you have a Github account, use this to showcase your variations. Not only is this essential practice, but it demonstrates your skills to others, including perhaps future clients or employers...\n",
24
- " </span>\n",
25
- " </td>\n",
26
- " </tr>\n",
27
- "</table>"
28
- ]
29
- },
30
- {
31
- "cell_type": "code",
32
- "execution_count": 1,
33
- "metadata": {},
34
- "outputs": [],
35
- "source": [
36
- "# Start with imports - ask ChatGPT to explain any package that you don't know\n",
37
- "\n",
38
- "import os\n",
39
- "import json\n",
40
- "from dotenv import load_dotenv\n",
41
- "from openai import OpenAI\n",
42
- "from anthropic import Anthropic\n",
43
- "from IPython.display import Markdown, display"
44
- ]
45
- },
46
- {
47
- "cell_type": "code",
48
- "execution_count": null,
49
- "metadata": {},
50
- "outputs": [],
51
- "source": [
52
- "# Always remember to do this!\n",
53
- "load_dotenv(override=True)"
54
- ]
55
- },
56
- {
57
- "cell_type": "code",
58
- "execution_count": null,
59
- "metadata": {},
60
- "outputs": [],
61
- "source": [
62
- "# Print the key prefixes to help with any debugging\n",
63
- "\n",
64
- "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
65
- "anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')\n",
66
- "google_api_key = os.getenv('GOOGLE_API_KEY')\n",
67
- "deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')\n",
68
- "groq_api_key = os.getenv('GROQ_API_KEY')\n",
69
- "\n",
70
- "if openai_api_key:\n",
71
- " print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
72
- "else:\n",
73
- " print(\"OpenAI API Key not set\")\n",
74
- " \n",
75
- "if anthropic_api_key:\n",
76
- " print(f\"Anthropic API Key exists and begins {anthropic_api_key[:7]}\")\n",
77
- "else:\n",
78
- " print(\"Anthropic API Key not set (and this is optional)\")\n",
79
- "\n",
80
- "if google_api_key:\n",
81
- " print(f\"Google API Key exists and begins {google_api_key[:2]}\")\n",
82
- "else:\n",
83
- " print(\"Google API Key not set (and this is optional)\")\n",
84
- "\n",
85
- "if deepseek_api_key:\n",
86
- " print(f\"DeepSeek API Key exists and begins {deepseek_api_key[:3]}\")\n",
87
- "else:\n",
88
- " print(\"DeepSeek API Key not set (and this is optional)\")\n",
89
- "\n",
90
- "if groq_api_key:\n",
91
- " print(f\"Groq API Key exists and begins {groq_api_key[:4]}\")\n",
92
- "else:\n",
93
- " print(\"Groq API Key not set (and this is optional)\")"
94
- ]
95
- },
96
- {
97
- "cell_type": "code",
98
- "execution_count": 4,
99
- "metadata": {},
100
- "outputs": [],
101
- "source": [
102
- "request = \"Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. \"\n",
103
- "request += \"Answer only with the question, no explanation.\"\n",
104
- "messages = [{\"role\": \"user\", \"content\": request}]"
105
- ]
106
- },
107
- {
108
- "cell_type": "code",
109
- "execution_count": null,
110
- "metadata": {},
111
- "outputs": [],
112
- "source": [
113
- "messages"
114
- ]
115
- },
116
- {
117
- "cell_type": "code",
118
- "execution_count": null,
119
- "metadata": {},
120
- "outputs": [],
121
- "source": [
122
- "openai = OpenAI()\n",
123
- "response = openai.chat.completions.create(\n",
124
- " model=\"gpt-4o-mini\",\n",
125
- " messages=messages,\n",
126
- ")\n",
127
- "question = response.choices[0].message.content\n",
128
- "print(question)\n"
129
- ]
130
- },
131
- {
132
- "cell_type": "code",
133
- "execution_count": 7,
134
- "metadata": {},
135
- "outputs": [],
136
- "source": [
137
- "competitors = []\n",
138
- "answers = []\n",
139
- "messages = [{\"role\": \"user\", \"content\": question}]"
140
- ]
141
- },
142
- {
143
- "cell_type": "code",
144
- "execution_count": null,
145
- "metadata": {},
146
- "outputs": [],
147
- "source": [
148
- "# The API we know well\n",
149
- "\n",
150
- "model_name = \"gpt-4o-mini\"\n",
151
- "\n",
152
- "response = openai.chat.completions.create(model=model_name, messages=messages)\n",
153
- "answer = response.choices[0].message.content\n",
154
- "\n",
155
- "display(Markdown(answer))\n",
156
- "competitors.append(model_name)\n",
157
- "answers.append(answer)"
158
- ]
159
- },
160
- {
161
- "cell_type": "code",
162
- "execution_count": null,
163
- "metadata": {},
164
- "outputs": [],
165
- "source": [
166
- "# Anthropic has a slightly different API, and Max Tokens is required\n",
167
- "\n",
168
- "model_name = \"claude-3-7-sonnet-latest\"\n",
169
- "\n",
170
- "claude = Anthropic()\n",
171
- "response = claude.messages.create(model=model_name, messages=messages, max_tokens=1000)\n",
172
- "answer = response.content[0].text\n",
173
- "\n",
174
- "display(Markdown(answer))\n",
175
- "competitors.append(model_name)\n",
176
- "answers.append(answer)"
177
- ]
178
- },
179
- {
180
- "cell_type": "code",
181
- "execution_count": null,
182
- "metadata": {},
183
- "outputs": [],
184
- "source": [
185
- "gemini = OpenAI(api_key=google_api_key, base_url=\"https://generativelanguage.googleapis.com/v1beta/openai/\")\n",
186
- "model_name = \"gemini-2.0-flash\"\n",
187
- "\n",
188
- "response = gemini.chat.completions.create(model=model_name, messages=messages)\n",
189
- "answer = response.choices[0].message.content\n",
190
- "\n",
191
- "display(Markdown(answer))\n",
192
- "competitors.append(model_name)\n",
193
- "answers.append(answer)"
194
- ]
195
- },
196
- {
197
- "cell_type": "code",
198
- "execution_count": null,
199
- "metadata": {},
200
- "outputs": [],
201
- "source": [
202
- "deepseek = OpenAI(api_key=deepseek_api_key, base_url=\"https://api.deepseek.com/v1\")\n",
203
- "model_name = \"deepseek-chat\"\n",
204
- "\n",
205
- "response = deepseek.chat.completions.create(model=model_name, messages=messages)\n",
206
- "answer = response.choices[0].message.content\n",
207
- "\n",
208
- "display(Markdown(answer))\n",
209
- "competitors.append(model_name)\n",
210
- "answers.append(answer)"
211
- ]
212
- },
213
- {
214
- "cell_type": "code",
215
- "execution_count": null,
216
- "metadata": {},
217
- "outputs": [],
218
- "source": [
219
- "groq = OpenAI(api_key=groq_api_key, base_url=\"https://api.groq.com/openai/v1\")\n",
220
- "model_name = \"llama-3.3-70b-versatile\"\n",
221
- "\n",
222
- "response = groq.chat.completions.create(model=model_name, messages=messages)\n",
223
- "answer = response.choices[0].message.content\n",
224
- "\n",
225
- "display(Markdown(answer))\n",
226
- "competitors.append(model_name)\n",
227
- "answers.append(answer)\n"
228
- ]
229
- },
230
- {
231
- "cell_type": "markdown",
232
- "metadata": {},
233
- "source": [
234
- "## For the next cell, we will use Ollama\n",
235
- "\n",
236
- "Ollama runs a local web service that gives an OpenAI compatible endpoint, \n",
237
- "and runs models locally using high performance C++ code.\n",
238
- "\n",
239
- "If you don't have Ollama, install it here by visiting https://ollama.com then pressing Download and following the instructions.\n",
240
- "\n",
241
- "After it's installed, you should be able to visit here: http://localhost:11434 and see the message \"Ollama is running\"\n",
242
- "\n",
243
- "You might need to restart Cursor (and maybe reboot). Then open a Terminal (control+\\`) and run `ollama serve`\n",
244
- "\n",
245
- "Useful Ollama commands (run these in the terminal, or with an exclamation mark in this notebook):\n",
246
- "\n",
247
- "`ollama pull <model_name>` downloads a model locally \n",
248
- "`ollama ls` lists all the models you've downloaded \n",
249
- "`ollama rm <model_name>` deletes the specified model from your downloads"
250
- ]
251
- },
252
- {
253
- "cell_type": "markdown",
254
- "metadata": {},
255
- "source": [
256
- "<table style=\"margin: 0; text-align: left; width:100%\">\n",
257
- " <tr>\n",
258
- " <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
259
- " <img src=\"../assets/stop.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
260
- " </td>\n",
261
- " <td>\n",
262
- " <h2 style=\"color:#ff7800;\">Super important - ignore me at your peril!</h2>\n",
263
- " <span style=\"color:#ff7800;\">The model called <b>llama3.3</b> is FAR too large for home computers - it's not intended for personal computing and will consume all your resources! Stick with the nicely sized <b>llama3.2</b> or <b>llama3.2:1b</b> and if you want larger, try llama3.1 or smaller variants of Qwen, Gemma, Phi or DeepSeek. See the <A href=\"https://ollama.com/models\">the Ollama models page</a> for a full list of models and sizes.\n",
264
- " </span>\n",
265
- " </td>\n",
266
- " </tr>\n",
267
- "</table>"
268
- ]
269
- },
270
- {
271
- "cell_type": "code",
272
- "execution_count": null,
273
- "metadata": {},
274
- "outputs": [],
275
- "source": [
276
- "!ollama pull llama3.2"
277
- ]
278
- },
279
- {
280
- "cell_type": "code",
281
- "execution_count": null,
282
- "metadata": {},
283
- "outputs": [],
284
- "source": [
285
- "ollama = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')\n",
286
- "model_name = \"llama3.2\"\n",
287
- "\n",
288
- "response = ollama.chat.completions.create(model=model_name, messages=messages)\n",
289
- "answer = response.choices[0].message.content\n",
290
- "\n",
291
- "display(Markdown(answer))\n",
292
- "competitors.append(model_name)\n",
293
- "answers.append(answer)"
294
- ]
295
- },
296
- {
297
- "cell_type": "code",
298
- "execution_count": null,
299
- "metadata": {},
300
- "outputs": [],
301
- "source": [
302
- "# So where are we?\n",
303
- "\n",
304
- "print(competitors)\n",
305
- "print(answers)\n"
306
- ]
307
- },
308
- {
309
- "cell_type": "code",
310
- "execution_count": null,
311
- "metadata": {},
312
- "outputs": [],
313
- "source": [
314
- "# It's nice to know how to use \"zip\"\n",
315
- "for competitor, answer in zip(competitors, answers):\n",
316
- " print(f\"Competitor: {competitor}\\n\\n{answer}\")\n"
317
- ]
318
- },
319
- {
320
- "cell_type": "code",
321
- "execution_count": 20,
322
- "metadata": {},
323
- "outputs": [],
324
- "source": [
325
- "# Let's bring this together - note the use of \"enumerate\"\n",
326
- "\n",
327
- "together = \"\"\n",
328
- "for index, answer in enumerate(answers):\n",
329
- " together += f\"# Response from competitor {index+1}\\n\\n\"\n",
330
- " together += answer + \"\\n\\n\""
331
- ]
332
- },
333
- {
334
- "cell_type": "code",
335
- "execution_count": null,
336
- "metadata": {},
337
- "outputs": [],
338
- "source": [
339
- "print(together)"
340
- ]
341
- },
342
- {
343
- "cell_type": "code",
344
- "execution_count": 22,
345
- "metadata": {},
346
- "outputs": [],
347
- "source": [
348
- "judge = f\"\"\"You are judging a competition between {len(competitors)} competitors.\n",
349
- "Each model has been given this question:\n",
350
- "\n",
351
- "{question}\n",
352
- "\n",
353
- "Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.\n",
354
- "Respond with JSON, and only JSON, with the following format:\n",
355
- "{{\"results\": [\"best competitor number\", \"second best competitor number\", \"third best competitor number\", ...]}}\n",
356
- "\n",
357
- "Here are the responses from each competitor:\n",
358
- "\n",
359
- "{together}\n",
360
- "\n",
361
- "Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks.\"\"\"\n"
362
- ]
363
- },
364
- {
365
- "cell_type": "code",
366
- "execution_count": null,
367
- "metadata": {},
368
- "outputs": [],
369
- "source": [
370
- "print(judge)"
371
- ]
372
- },
373
- {
374
- "cell_type": "code",
375
- "execution_count": 29,
376
- "metadata": {},
377
- "outputs": [],
378
- "source": [
379
- "judge_messages = [{\"role\": \"user\", \"content\": judge}]"
380
- ]
381
- },
382
- {
383
- "cell_type": "code",
384
- "execution_count": null,
385
- "metadata": {},
386
- "outputs": [],
387
- "source": [
388
- "# Judgement time!\n",
389
- "\n",
390
- "openai = OpenAI()\n",
391
- "response = openai.chat.completions.create(\n",
392
- " model=\"o3-mini\",\n",
393
- " messages=judge_messages,\n",
394
- ")\n",
395
- "results = response.choices[0].message.content\n",
396
- "print(results)\n"
397
- ]
398
- },
399
- {
400
- "cell_type": "code",
401
- "execution_count": null,
402
- "metadata": {},
403
- "outputs": [],
404
- "source": [
405
- "# OK let's turn this into results!\n",
406
- "\n",
407
- "results_dict = json.loads(results)\n",
408
- "ranks = results_dict[\"results\"]\n",
409
- "for index, result in enumerate(ranks):\n",
410
- " competitor = competitors[int(result)-1]\n",
411
- " print(f\"Rank {index+1}: {competitor}\")"
412
- ]
413
- },
414
- {
415
- "cell_type": "markdown",
416
- "metadata": {},
417
- "source": [
418
- "<table style=\"margin: 0; text-align: left; width:100%\">\n",
419
- " <tr>\n",
420
- " <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
421
- " <img src=\"../assets/exercise.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
422
- " </td>\n",
423
- " <td>\n",
424
- " <h2 style=\"color:#ff7800;\">Exercise</h2>\n",
425
- " <span style=\"color:#ff7800;\">Which pattern(s) did this use? Try updating this to add another Agentic design pattern.\n",
426
- " </span>\n",
427
- " </td>\n",
428
- " </tr>\n",
429
- "</table>"
430
- ]
431
- },
432
- {
433
- "cell_type": "markdown",
434
- "metadata": {},
435
- "source": [
436
- "<table style=\"margin: 0; text-align: left; width:100%\">\n",
437
- " <tr>\n",
438
- " <td style=\"width: 150px; height: 150px; vertical-align: middle;\">\n",
439
- " <img src=\"../assets/business.png\" width=\"150\" height=\"150\" style=\"display: block;\" />\n",
440
- " </td>\n",
441
- " <td>\n",
442
- " <h2 style=\"color:#00bfff;\">Commercial implications</h2>\n",
443
- " <span style=\"color:#00bfff;\">These kinds of patterns - to send a task to multiple models, and evaluate results,\n",
444
- " are common where you need to improve the quality of your LLM response. This approach can be universally applied\n",
445
- " to business projects where accuracy is critical.\n",
446
- " </span>\n",
447
- " </td>\n",
448
- " </tr>\n",
449
- "</table>"
450
- ]
451
- }
452
- ],
453
- "metadata": {
454
- "kernelspec": {
455
- "display_name": ".venv",
456
- "language": "python",
457
- "name": "python3"
458
- },
459
- "language_info": {
460
- "codemirror_mode": {
461
- "name": "ipython",
462
- "version": 3
463
- },
464
- "file_extension": ".py",
465
- "mimetype": "text/x-python",
466
- "name": "python",
467
- "nbconvert_exporter": "python",
468
- "pygments_lexer": "ipython3",
469
- "version": "3.12.9"
470
- }
471
- },
472
- "nbformat": 4,
473
- "nbformat_minor": 2
474
- }