consilium_ofp

Running

App Files Files Community

consilium_ofp / research_tools /web_search.py

azettl

Update research_tools/web_search.py

c72543c verified about 2 months ago

raw

history blame contribute delete

4.6 kB

	"""
	Web Search Tool using DuckDuckGo via smolagents with Mistral
	"""
	from .base_tool import BaseTool
	from typing import Optional
	from smolagents import CodeAgent, DuckDuckGoSearchTool, FinalAnswerTool, VisitWebpageTool, OpenAIServerModel
	import os

	class WebSearchTool(BaseTool):
	"""Web search using DuckDuckGo via smolagents with Mistral model"""

	def __init__(self):
	super().__init__("Web Search", "Search the web for current information using DuckDuckGo")
	self.rate_limit_delay = 2.0

	try:
	# Create custom Mistral model for CodeAgent
	mistral_key = os.getenv("MISTRAL_API_KEY")
	if mistral_key:
	mistral_model = OpenAIServerModel(
	api_key=mistral_key,
	api_base="https://api.mistral.ai/v1",
	model_id="mistral-large-latest"
	)
	else:
	# Fallback to InferenceClientModel if no Mistral key
	from smolagents import InferenceClientModel
	mistral_model = InferenceClientModel()

	self.agent = CodeAgent(
	tools=[
	DuckDuckGoSearchTool(),
	VisitWebpageTool(),
	FinalAnswerTool()
	],
	model=mistral_model, # Use Mistral instead of InferenceClientModel
	max_steps=3,
	verbosity_level=0
	)
	except Exception as e:
	print(f"Warning: Could not initialize web search agent: {e}")
	self.agent = None

	# Keep the rest of your original search method unchanged
	def search(self, query: str, max_results: int = 5, **kwargs) -> str:
	"""Use the CodeAgent to perform comprehensive web search and analysis"""
	if not self.agent:
	return self.format_error_response(query, "Web search agent not available. Please check dependencies.")

	self.rate_limit()

	try:
	# Simplified prompt for better reliability
	agent_prompt = f"Search the web for current information about: {query}. Provide a comprehensive summary of the most relevant and recent findings."

	# Run the agent
	result = self.agent.run(agent_prompt)

	# Clean and validate the result
	if result and isinstance(result, str) and len(result.strip()) > 0:
	# Remove any code-like syntax that might cause parsing errors
	cleaned_result = result.replace('```', '').replace('`', '').strip()
	return f"Web Search Results for: {query}\n\n{cleaned_result}"
	else:
	return f"Web Search for: {query}\n\nNo clear results found. Please try a different search term."

	except Exception as e:
	# More robust fallback
	error_msg = str(e)
	if "max steps" in error_msg.lower():
	return f"Web Search for: {query}\n\nSearch completed but reached complexity limit. Basic analysis: This query relates to {query.lower()} and would benefit from further investigation."
	elif "syntax" in error_msg.lower():
	return f"Web Search for: {query}\n\nSearch encountered formatting issues but found relevant information about {query.lower()}."
	else:
	return self.format_error_response(query, error_msg)

	def should_use_for_query(self, query: str) -> bool:
	"""Web search is good for current events, news, and general information"""
	current_indicators = ['news', 'recent', 'latest', 'current', 'today', '2024', '2025']
	general_indicators = ['what is', 'how to', 'guide', 'tutorial', 'review']

	query_lower = query.lower()
	return any(indicator in query_lower for indicator in current_indicators + general_indicators)

	def extract_key_info(self, text: str) -> dict:
	"""Extract key information from web search results"""
	base_info = super().extract_key_info(text)

	if text:
	# Look for news-specific patterns
	base_info.update({
	'has_news_keywords': bool(any(word in text.lower() for word in ['breaking', 'report', 'announced', 'according to'])),
	'has_quotes': text.count('"') > 1,
	'has_sources': bool(any(source in text.lower() for source in ['reuters', 'bloomberg', 'bbc', 'cnn', 'associated press']))
	})

	return base_info