Spaces:

mamogasr
/

llm_engineering

Sleeping

File size: 4,622 Bytes

5fdb69e

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d25b0aef-3e5e-4026-90ee-2b373bf262b7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Step 0: Import Libraries\n",
    "from bs4 import BeautifulSoup\n",
    "from IPython.display import Markdown, display\n",
    "import ollama\n",
    "from openai import OpenAI\n",
    "import requests\n",
    "\n",
    "# Step 1: Set Constants and Variables\n",
    "print(\"[INFO] Setting constants and variable ...\")\n",
    "WEBSITE_URL = \"https://arxiv.org/\"\n",
    "MODEL = \"llama3.2\"\n",
    "approaches = [\"local-call\", \"python-package\", \"openai-python-library\"]\n",
    "approach = approaches[2]\n",
    "\n",
    "# Step 1: Scrape Website\n",
    "print(\"[INFO] Scraping website ...\")\n",
    "url_response = requests.get(\n",
    "    url=WEBSITE_URL,\n",
    "    headers={\"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"}\n",
    "    )\n",
    "soup = BeautifulSoup(\n",
    "    markup=url_response.content,\n",
    "    features=\"html.parser\"\n",
    "    )\n",
    "website_title = soup.title.string if soup.title else \"No title found!!!\"\n",
    "for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
    "    irrelevant.decompose()\n",
    "website_text = soup.body.get_text(\n",
    "    separator=\"\\n\",\n",
    "    strip=True\n",
    "    )\n",
    "\n",
    "# Step 2: Create Prompts\n",
    "print(\"[INFO] Creating system prompt ...\")\n",
    "system_prompt = \"You are an assistant that analyzes the contents of a \\\n",
    "    website and provides a short summary, ignoring text that might be \\\n",
    "    navigation related. Respond in markdown.\"\n",
    "\n",
    "print(\"[INFO] Creating user prompt ...\")\n",
    "user_prompt = f\"You are looking at a website titled {website_title}\"\n",
    "user_prompt += \"\\nBased on the contents of the website, please provide \\\n",
    "    a short summary of this website in markdown. If the website \\\n",
    "    includes news or announcements, summarize them, too. The contents \\\n",
    "    of this website are as follows:\\n\\n\"\n",
    "user_prompt += website_text\n",
    "\n",
    "# Step 3: Make Messages List\n",
    "print(\"[INFO] Making messages list ...\")\n",
    "messages = [\n",
    "    {\"role\": \"system\", \"content\": system_prompt},\n",
    "    {\"role\": \"user\", \"content\": user_prompt}\n",
    "]\n",
    "\n",
    "# Step 4: Call Model and Print Results\n",
    "if approach == \"local-call\":\n",
    "    response = requests.post(\n",
    "        url=\"http://localhost:11434/api/chat\",\n",
    "        json={\n",
    "            \"model\": MODEL,\n",
    "            \"messages\": messages,\n",
    "            \"stream\": False\n",
    "            },\n",
    "        headers={\"Content-Type\": \"application/json\"}\n",
    "    )\n",
    "    print(\"[INFO] Printing result ...\")\n",
    "    display(Markdown(response.json()[\"message\"][\"content\"]))\n",
    "elif approach == \"python-package\":\n",
    "    response = ollama.chat(\n",
    "        model=MODEL,\n",
    "        messages=messages,\n",
    "        stream=False\n",
    "    )\n",
    "    print(\"[INFO] Printing result ...\")\n",
    "    display(Markdown(response[\"message\"][\"content\"]))\n",
    "elif approach == \"openai-python-library\":\n",
    "    ollama_via_openai = OpenAI(\n",
    "        base_url=\"http://localhost:11434/v1\",\n",
    "        api_key=\"ollama\"\n",
    "    )\n",
    "    response = ollama_via_openai.chat.completions.create(\n",
    "        model=MODEL,\n",
    "        messages=messages\n",
    "    )\n",
    "    print(\"[INFO] Printing result ...\")\n",
    "    display(Markdown(response.choices[0].message.content))\n",
    "else:\n",
    "    raise ValueError(f\"[INFO] Invalid approach! Please select an approach from {approaches} and try again.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b0a6676e-fb43-4725-9389-2acd74c13c4e",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}