Spaces:
Sleeping
Sleeping
| # Web search and content tools | |
| import requests | |
| import re | |
| from bs4 import BeautifulSoup | |
| from duckduckgo_search import DDGS | |
| from smolagents import Tool | |
| class WebSearchTool(Tool): | |
| name = "web_search" | |
| description = "Search the web for information about a query using DuckDuckGo." | |
| inputs = { | |
| "query": { | |
| "type": "string", | |
| "description": "The search query." | |
| } | |
| } | |
| output_type = "string" | |
| def __init__(self, **kwargs): | |
| super().__init__(**kwargs) | |
| self.max_results = 3 | |
| def forward(self, query: str) -> str: | |
| assert isinstance(query, str), "Query must be a string." | |
| try: | |
| results = [] | |
| with DDGS() as ddgs: | |
| ddgs_results = list(ddgs.text(query, max_results=self.max_results)) | |
| if not ddgs_results: | |
| return "No web search results found." | |
| formatted_results = "\nWeb Search Results:\n" | |
| for i, r in enumerate(ddgs_results, 1): | |
| formatted_results += f"\n{i}. {r['title']}\n {r['body']}\n Source: {r['href']}\n" | |
| return formatted_results | |
| except Exception as e: | |
| print(f"Error in web search: {str(e)}") | |
| return f"Error performing web search: {str(e)}" | |
| class WebContentTool(Tool): | |
| name = "web_content" | |
| description = "Fetch and extract content from a specific webpage." | |
| inputs = { | |
| "url": { | |
| "type": "string", | |
| "description": "The URL of the webpage to fetch content from." | |
| } | |
| } | |
| output_type = "string" | |
| def forward(self, url: str) -> str: | |
| assert isinstance(url, str), "URL must be a string." | |
| try: | |
| headers = { | |
| "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" | |
| } | |
| response = requests.get(url, headers=headers, timeout=10) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| for script in soup(["script", "style"]): | |
| script.extract() | |
| text = soup.get_text(separator='\n') | |
| lines = (line.strip() for line in text.splitlines()) | |
| chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) | |
| text = '\n'.join(chunk for chunk in chunks if chunk) | |
| if len(text) > 2000: | |
| text = text[:2000] + "... [content truncated]" | |
| return f"Content from {url}:\n\n{text}" | |
| except Exception as e: | |
| print(f"Error fetching web content: {str(e)}") | |
| return f"Error fetching content from {url}: {str(e)}" | |