File size: 4,622 Bytes
5fdb69e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d25b0aef-3e5e-4026-90ee-2b373bf262b7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Step 0: Import Libraries\n",
    "from bs4 import BeautifulSoup\n",
    "from IPython.display import Markdown, display\n",
    "import ollama\n",
    "from openai import OpenAI\n",
    "import requests\n",
    "\n",
    "# Step 1: Set Constants and Variables\n",
    "print(\"[INFO] Setting constants and variable ...\")\n",
    "WEBSITE_URL = \"https://arxiv.org/\"\n",
    "MODEL = \"llama3.2\"\n",
    "approaches = [\"local-call\", \"python-package\", \"openai-python-library\"]\n",
    "approach = approaches[2]\n",
    "\n",
    "# Step 1: Scrape Website\n",
    "print(\"[INFO] Scraping website ...\")\n",
    "url_response = requests.get(\n",
    "    url=WEBSITE_URL,\n",
    "    headers={\"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"}\n",
    "    )\n",
    "soup = BeautifulSoup(\n",
    "    markup=url_response.content,\n",
    "    features=\"html.parser\"\n",
    "    )\n",
    "website_title = soup.title.string if soup.title else \"No title found!!!\"\n",
    "for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
    "    irrelevant.decompose()\n",
    "website_text = soup.body.get_text(\n",
    "    separator=\"\\n\",\n",
    "    strip=True\n",
    "    )\n",
    "\n",
    "# Step 2: Create Prompts\n",
    "print(\"[INFO] Creating system prompt ...\")\n",
    "system_prompt = \"You are an assistant that analyzes the contents of a \\\n",
    "    website and provides a short summary, ignoring text that might be \\\n",
    "    navigation related. Respond in markdown.\"\n",
    "\n",
    "print(\"[INFO] Creating user prompt ...\")\n",
    "user_prompt = f\"You are looking at a website titled {website_title}\"\n",
    "user_prompt += \"\\nBased on the contents of the website, please provide \\\n",
    "    a short summary of this website in markdown. If the website \\\n",
    "    includes news or announcements, summarize them, too. The contents \\\n",
    "    of this website are as follows:\\n\\n\"\n",
    "user_prompt += website_text\n",
    "\n",
    "# Step 3: Make Messages List\n",
    "print(\"[INFO] Making messages list ...\")\n",
    "messages = [\n",
    "    {\"role\": \"system\", \"content\": system_prompt},\n",
    "    {\"role\": \"user\", \"content\": user_prompt}\n",
    "]\n",
    "\n",
    "# Step 4: Call Model and Print Results\n",
    "if approach == \"local-call\":\n",
    "    response = requests.post(\n",
    "        url=\"http://localhost:11434/api/chat\",\n",
    "        json={\n",
    "            \"model\": MODEL,\n",
    "            \"messages\": messages,\n",
    "            \"stream\": False\n",
    "            },\n",
    "        headers={\"Content-Type\": \"application/json\"}\n",
    "    )\n",
    "    print(\"[INFO] Printing result ...\")\n",
    "    display(Markdown(response.json()[\"message\"][\"content\"]))\n",
    "elif approach == \"python-package\":\n",
    "    response = ollama.chat(\n",
    "        model=MODEL,\n",
    "        messages=messages,\n",
    "        stream=False\n",
    "    )\n",
    "    print(\"[INFO] Printing result ...\")\n",
    "    display(Markdown(response[\"message\"][\"content\"]))\n",
    "elif approach == \"openai-python-library\":\n",
    "    ollama_via_openai = OpenAI(\n",
    "        base_url=\"http://localhost:11434/v1\",\n",
    "        api_key=\"ollama\"\n",
    "    )\n",
    "    response = ollama_via_openai.chat.completions.create(\n",
    "        model=MODEL,\n",
    "        messages=messages\n",
    "    )\n",
    "    print(\"[INFO] Printing result ...\")\n",
    "    display(Markdown(response.choices[0].message.content))\n",
    "else:\n",
    "    raise ValueError(f\"[INFO] Invalid approach! Please select an approach from {approaches} and try again.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b0a6676e-fb43-4725-9389-2acd74c13c4e",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}