Spaces:
Running
Running
""" | |
Web Search Tool using DuckDuckGo via smolagents with Mistral | |
""" | |
from .base_tool import BaseTool | |
from typing import Optional | |
from smolagents import CodeAgent, DuckDuckGoSearchTool, FinalAnswerTool, VisitWebpageTool, OpenAIServerModel | |
import os | |
class WebSearchTool(BaseTool): | |
"""Web search using DuckDuckGo via smolagents with Mistral model""" | |
def __init__(self): | |
super().__init__("Web Search", "Search the web for current information using DuckDuckGo") | |
self.rate_limit_delay = 2.0 | |
try: | |
# Create custom Mistral model for CodeAgent | |
mistral_key = os.getenv("MISTRAL_API_KEY") | |
if mistral_key: | |
mistral_model = OpenAIServerModel( | |
api_key=mistral_key, | |
api_base="https://api.mistral.ai/v1", | |
model_id="mistral-large-latest" | |
) | |
else: | |
# Fallback to InferenceClientModel if no Mistral key | |
from smolagents import InferenceClientModel | |
mistral_model = InferenceClientModel() | |
self.agent = CodeAgent( | |
tools=[ | |
DuckDuckGoSearchTool(), | |
VisitWebpageTool(), | |
FinalAnswerTool() | |
], | |
model=mistral_model, # Use Mistral instead of InferenceClientModel | |
max_steps=3, | |
verbosity_level=0 | |
) | |
except Exception as e: | |
print(f"Warning: Could not initialize web search agent: {e}") | |
self.agent = None | |
# Keep the rest of your original search method unchanged | |
def search(self, query: str, max_results: int = 5, **kwargs) -> str: | |
"""Use the CodeAgent to perform comprehensive web search and analysis""" | |
if not self.agent: | |
return self.format_error_response(query, "Web search agent not available. Please check dependencies.") | |
self.rate_limit() | |
try: | |
# Simplified prompt for better reliability | |
agent_prompt = f"Search the web for current information about: {query}. Provide a comprehensive summary of the most relevant and recent findings." | |
# Run the agent | |
result = self.agent.run(agent_prompt) | |
# Clean and validate the result | |
if result and isinstance(result, str) and len(result.strip()) > 0: | |
# Remove any code-like syntax that might cause parsing errors | |
cleaned_result = result.replace('```', '').replace('`', '').strip() | |
return f"**Web Search Results for: {query}**\n\n{cleaned_result}" | |
else: | |
return f"**Web Search for: {query}**\n\nNo clear results found. Please try a different search term." | |
except Exception as e: | |
# More robust fallback | |
error_msg = str(e) | |
if "max steps" in error_msg.lower(): | |
return f"**Web Search for: {query}**\n\nSearch completed but reached complexity limit. Basic analysis: This query relates to {query.lower()} and would benefit from further investigation." | |
elif "syntax" in error_msg.lower(): | |
return f"**Web Search for: {query}**\n\nSearch encountered formatting issues but found relevant information about {query.lower()}." | |
else: | |
return self.format_error_response(query, error_msg) | |
def should_use_for_query(self, query: str) -> bool: | |
"""Web search is good for current events, news, and general information""" | |
current_indicators = ['news', 'recent', 'latest', 'current', 'today', '2024', '2025'] | |
general_indicators = ['what is', 'how to', 'guide', 'tutorial', 'review'] | |
query_lower = query.lower() | |
return any(indicator in query_lower for indicator in current_indicators + general_indicators) | |
def extract_key_info(self, text: str) -> dict: | |
"""Extract key information from web search results""" | |
base_info = super().extract_key_info(text) | |
if text: | |
# Look for news-specific patterns | |
base_info.update({ | |
'has_news_keywords': bool(any(word in text.lower() for word in ['breaking', 'report', 'announced', 'according to'])), | |
'has_quotes': text.count('"') > 1, | |
'has_sources': bool(any(source in text.lower() for source in ['reuters', 'bloomberg', 'bbc', 'cnn', 'associated press'])) | |
}) | |
return base_info |