mjschock's picture
Enhance agent functionality in main_v2.py by adding WikipediaSearchTool and updating DuckDuckGoSearchTool and VisitWebpageTool parameters. Modify agent initialization to accommodate new tools and increase max results and output length. Update requirements.txt to include Wikipedia-API dependency. Refactor imports for better organization across agent modules.
e4c7240 unverified
raw
history blame
1.35 kB
from typing import Any, Dict
import requests
from bs4 import BeautifulSoup
from smolagents import tool
@tool
def browse_webpage(url: str) -> Dict[str, Any]:
"""
Browse a webpage and extract its content.
Args:
url: URL of the webpage to browse
Returns:
Dictionary containing title, text content, and links from the webpage
"""
try:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
response = requests.get(url, headers=headers)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
# Extract title
title = soup.title.string if soup.title else "No title found"
# Extract main text content
paragraphs = soup.find_all("p")
text_content = "\n".join([p.get_text().strip() for p in paragraphs])
# Extract links
links = []
for link in soup.find_all("a", href=True):
href = link["href"]
text = link.get_text().strip()
if href.startswith("http"):
links.append({"text": text, "href": href})
return {"title": title, "content": text_content, "links": links}
except Exception as e:
return {"error": str(e)}