File size: 3,853 Bytes
5fdb69e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "6e9fa1fc-eac5-4d1d-9be4-541b3f2b3458",
   "metadata": {},
   "source": [
    "# Day 2 EXERCISE Solution:\n",
    "\n",
    "Upgraded day 1 project that scrapes and summarizes any webpage using an Open Source model running locally via Ollama instead of OpenAI\n",
    "\n",
    "## Note:-\n",
    "If Ollama is slow on your machine, try using `llama3.2:1b` as an alternative: \n",
    "1. Run `ollama pull llama3.2:1b` from a Terminal or Powershell\n",
    "2. **Ctrl + /** to comment this code line below: `MODEL = \"llama3.2\"`\n",
    "3. same **Ctrl + /** to uncomment: `MODEL = \"llama3.2:1b\"`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4e2a9393-7767-488e-a8bf-27c12dca35bd",
   "metadata": {},
   "outputs": [],
   "source": [
    "# imports:-\n",
    "\n",
    "import requests\n",
    "from bs4 import BeautifulSoup\n",
    "from IPython.display import Markdown, display\n",
    "import ollama"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "29ddd15d-a3c5-4f4e-a678-873f56162724",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Constants:-\n",
    "\n",
    "# MODEL = \"llama3.2\"\n",
    "MODEL = \"llama3.2:1b\"\n",
    "# MODEL = \"deepseek-r1:1.5b\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6de38216-6d1c-48c4-877b-86d403f4e0f8",
   "metadata": {},
   "outputs": [],
   "source": [
    "class Website:\n",
    "    def __init__(self, url):\n",
    "        self.url = url\n",
    "        response = requests.get(url)\n",
    "        soup = BeautifulSoup(response.content, 'html.parser')\n",
    "        self.title = soup.title.string if soup.title else \"No title found\"\n",
    "        for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
    "            irrelevant.decompose()\n",
    "        self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
    "\n",
    "\n",
    "system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
    "                and provides a short summary, ignoring text that might be navigation related. \\\n",
    "                Respond in markdown.\"\n",
    "\n",
    "\n",
    "def user_prompt_for(website):\n",
    "    user_prompt = f\"You are looking at a website titled {website.title}\"\n",
    "    user_prompt += \"\\nThe contents of this website is as follows; \\\n",
    "                    please provide a short summary of this website in markdown. \\\n",
    "                    If it includes news or announcements, then summarize these too.\\n\\n\"\n",
    "    user_prompt += website.text\n",
    "    return user_prompt\n",
    "\n",
    "\n",
    "def messages_for(website):\n",
    "    return [\n",
    "        {\"role\": \"system\", \"content\": system_prompt},\n",
    "        {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
    "    ]\n",
    "\n",
    "\n",
    "def summary(url):\n",
    "    website = Website(url)\n",
    "    response = ollama.chat(\n",
    "        model = MODEL,\n",
    "        messages = messages_for(website)\n",
    "    )\n",
    "    return display(Markdown(response['message']['content']))\n",
    "\n",
    "\n",
    "summary(\"https://edwarddonner.com\")\n",
    "# summary(\"https://cnn.com\")\n",
    "# summary(\"https://anthropic.com\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}