assignment_agent / custom_tools.py
Arbnor Tefiki
Add more tools and search enginge
2caebe4
raw
history blame
11.2 kB
import requests
from duckduckgo_search import DDGS
from langchain_core.tools import tool
import time
import re
import json
from datetime import datetime, timedelta
import urllib.parse
# Rate limiting
last_search_time = None
min_search_interval = 1.0
@tool
def reverse_text(input: str) -> str:
"""Reverse the characters in a text or string."""
return input[::-1]
@tool
def web_search(query: str) -> str:
"""Perform web search using multiple providers for robustness."""
global last_search_time
# Rate limiting
if last_search_time:
elapsed = time.time() - last_search_time
if elapsed < min_search_interval:
time.sleep(min_search_interval - elapsed)
query = query.strip()
if not query:
return "Empty search query"
results = []
# Try multiple search methods in order
search_methods = [
("Wikipedia", search_wikipedia),
("Google (via SerpAPI simulation)", search_google_fallback),
("DuckDuckGo", search_duckduckgo),
("Bing", search_bing_fallback),
]
for method_name, method_func in search_methods:
try:
print(f"Trying {method_name} search...")
method_results = method_func(query)
if method_results:
results.extend(method_results)
print(f"{method_name} found {len(method_results)} results")
if len(results) >= 3: # Enough results
break
except Exception as e:
print(f"{method_name} search failed: {e}")
continue
if not results:
return "No search results found. All search methods failed."
# Format results
formatted_results = []
for i, result in enumerate(results[:8]):
if isinstance(result, dict):
title = result.get('title', '')
content = result.get('content', '')
url = result.get('url', '')
formatted = f"{title}. {content}"
if url:
formatted += f" (Source: {url})"
formatted_results.append(formatted)
else:
formatted_results.append(str(result))
return "\n\n".join(formatted_results)
def search_wikipedia(query: str) -> list:
"""Search Wikipedia directly"""
results = []
try:
# Wikipedia API search
search_url = "https://en.wikipedia.org/w/api.php"
# First, search for articles
search_params = {
"action": "query",
"list": "search",
"srsearch": query,
"format": "json",
"srlimit": 5,
"srprop": "snippet|titlesnippet|size|wordcount"
}
response = requests.get(search_url, params=search_params, timeout=10)
if response.status_code == 200:
data = response.json()
search_results = data.get("query", {}).get("search", [])
for item in search_results[:3]:
title = item.get("title", "")
snippet = re.sub(r'<[^>]+>', '', item.get("snippet", ""))
# Get more detailed content
page_params = {
"action": "query",
"prop": "extracts|info",
"exintro": True,
"explaintext": True,
"inprop": "url",
"titles": title,
"format": "json",
"exsentences": 5
}
page_response = requests.get(search_url, params=page_params, timeout=10)
if page_response.status_code == 200:
page_data = page_response.json()
pages = page_data.get("query", {}).get("pages", {})
for page_id, page_info in pages.items():
extract = page_info.get("extract", "")
url = page_info.get("fullurl", "")
if extract:
results.append({
"title": f"Wikipedia: {title}",
"content": extract[:500],
"url": url
})
break
else:
# Use snippet if can't get extract
results.append({
"title": f"Wikipedia: {title}",
"content": snippet,
"url": f"https://en.wikipedia.org/wiki/{title.replace(' ', '_')}"
})
except Exception as e:
print(f"Wikipedia search error: {e}")
return results
def search_duckduckgo(query: str) -> list:
"""Search using DuckDuckGo"""
results = []
try:
with DDGS() as ddgs:
# Simple search without problematic parameters
search_results = list(ddgs.text(query, max_results=5))
for r in search_results:
results.append({
"title": r.get("title", ""),
"content": r.get("body", ""),
"url": r.get("href", "")
})
except Exception as e:
print(f"DuckDuckGo error: {e}")
return results
def search_google_fallback(query: str) -> list:
"""Fallback Google search using alternative methods"""
results = []
try:
# Try Google Custom Search JSON API simulation
# This is a fallback method - in production, use proper API
encoded_query = urllib.parse.quote(query)
# Try to get Google search results page
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
# Use a Google search URL
search_url = f"https://www.google.com/search?q={encoded_query}&hl=en"
# Note: This is a simplified approach and may not always work
# In production, use Google Custom Search API
except Exception as e:
print(f"Google fallback error: {e}")
return results
def search_bing_fallback(query: str) -> list:
"""Fallback Bing search"""
results = []
try:
# Bing Web Search API would be used here in production
# This is a placeholder for the pattern
pass
except Exception as e:
print(f"Bing fallback error: {e}")
return results
@tool
def calculate(expression: str) -> str:
"""Evaluate mathematical expressions safely."""
try:
# Clean the expression
expression = expression.strip()
# Handle various notations
expression = expression.replace("×", "*").replace("÷", "/")
expression = expression.replace("^", "**")
expression = expression.replace(",", "")
# Handle percentages
expression = re.sub(r'(\d+(?:\.\d+)?)\s*%\s*of\s*(\d+(?:\.\d+)?)', r'(\2 * \1 / 100)', expression)
expression = re.sub(r'(\d+(?:\.\d+)?)\s*%', r'(\1/100)', expression)
# Safe evaluation
allowed_names = {
"abs": abs, "round": round, "min": min, "max": max,
"pow": pow, "sum": sum, "__builtins__": {}
}
result = eval(expression, allowed_names)
if isinstance(result, float) and result.is_integer():
return str(int(result))
return str(result)
except Exception as e:
return f"Calculation error: {e}"
@tool
def wikipedia_summary(query: str) -> str:
"""Get Wikipedia summary for a topic."""
try:
results = search_wikipedia(query)
if results:
# Combine top results
summaries = []
for r in results[:2]:
summaries.append(f"{r['title']}: {r['content']}")
return "\n\n".join(summaries)
return f"No Wikipedia article found for '{query}'"
except Exception as e:
return f"Wikipedia error: {e}"
@tool
def define_term(term: str) -> str:
"""Define a term using dictionary API."""
try:
term = term.strip().lower()
# Try dictionary API
response = requests.get(
f"https://api.dictionaryapi.dev/api/v2/entries/en/{term}",
timeout=10
)
if response.status_code == 200:
data = response.json()
definitions = []
for entry in data:
for meaning in entry.get("meanings", []):
for definition in meaning.get("definitions", []):
def_text = definition.get("definition", "")
if def_text:
definitions.append(def_text)
if definitions:
return definitions[0] # Return first definition
# Fallback to Wikipedia
wiki_results = search_wikipedia(f"{term} definition meaning")
if wiki_results:
return wiki_results[0]['content'][:200]
return f"No definition found for '{term}'"
except Exception as e:
return f"Definition error: {e}"
# Advanced search function for specific GAIA queries
@tool
def gaia_smart_search(query: str) -> str:
"""Smart search specifically optimized for GAIA questions."""
# Parse query for specific patterns
query_lower = query.lower()
# For album/discography queries
if 'album' in query_lower or 'discography' in query_lower:
artist_match = re.search(r'([\w\s]+?)(?:\s+album|\s+discography|\s+between)', query)
if artist_match:
artist = artist_match.group(1).strip()
# Search for discography
return web_search(f"{artist} discography albums list")
# For Olympic queries
if 'olympic' in query_lower:
year_match = re.search(r'(\d{4})\s+(?:summer|winter)?\s*olympics', query_lower)
if year_match:
year = year_match.group(1)
return web_search(f"{year} Olympics participating countries athletes count")
# For academic papers
if 'paper' in query_lower or 'article' in query_lower:
author_match = re.search(r'by\s+([\w\s]+?)(?:\s+was|\s+published|\s+in)', query)
if author_match:
author = author_match.group(1).strip()
return web_search(f"{author} research paper article")
# Default to regular search
return web_search(query)
# List of tools
TOOLS = [web_search, calculate, wikipedia_summary, define_term, reverse_text, gaia_smart_search]