import requests from duckduckgo_search import DDGS from langchain_core.tools import tool import time import re import json from datetime import datetime, timedelta import urllib.parse # Rate limiting last_search_time = None min_search_interval = 1.0 @tool def reverse_text(input: str) -> str: """Reverse the characters in a text or string.""" return input[::-1] @tool def web_search(query: str) -> str: """Perform web search using multiple providers for robustness.""" global last_search_time # Rate limiting if last_search_time: elapsed = time.time() - last_search_time if elapsed < min_search_interval: time.sleep(min_search_interval - elapsed) query = query.strip() if not query: return "Empty search query" results = [] # Try multiple search methods in order search_methods = [ ("Wikipedia", search_wikipedia), ("Google (via SerpAPI simulation)", search_google_fallback), ("DuckDuckGo", search_duckduckgo), ("Bing", search_bing_fallback), ] for method_name, method_func in search_methods: try: print(f"Trying {method_name} search...") method_results = method_func(query) if method_results: results.extend(method_results) print(f"{method_name} found {len(method_results)} results") if len(results) >= 3: # Enough results break except Exception as e: print(f"{method_name} search failed: {e}") continue if not results: return "No search results found. All search methods failed." # Format results formatted_results = [] for i, result in enumerate(results[:8]): if isinstance(result, dict): title = result.get('title', '') content = result.get('content', '') url = result.get('url', '') formatted = f"{title}. {content}" if url: formatted += f" (Source: {url})" formatted_results.append(formatted) else: formatted_results.append(str(result)) return "\n\n".join(formatted_results) def search_wikipedia(query: str) -> list: """Search Wikipedia directly""" results = [] try: # Wikipedia API search search_url = "https://en.wikipedia.org/w/api.php" # First, search for articles search_params = { "action": "query", "list": "search", "srsearch": query, "format": "json", "srlimit": 5, "srprop": "snippet|titlesnippet|size|wordcount" } response = requests.get(search_url, params=search_params, timeout=10) if response.status_code == 200: data = response.json() search_results = data.get("query", {}).get("search", []) for item in search_results[:3]: title = item.get("title", "") snippet = re.sub(r'<[^>]+>', '', item.get("snippet", "")) # Get more detailed content page_params = { "action": "query", "prop": "extracts|info", "exintro": True, "explaintext": True, "inprop": "url", "titles": title, "format": "json", "exsentences": 5 } page_response = requests.get(search_url, params=page_params, timeout=10) if page_response.status_code == 200: page_data = page_response.json() pages = page_data.get("query", {}).get("pages", {}) for page_id, page_info in pages.items(): extract = page_info.get("extract", "") url = page_info.get("fullurl", "") if extract: results.append({ "title": f"Wikipedia: {title}", "content": extract[:500], "url": url }) break else: # Use snippet if can't get extract results.append({ "title": f"Wikipedia: {title}", "content": snippet, "url": f"https://en.wikipedia.org/wiki/{title.replace(' ', '_')}" }) except Exception as e: print(f"Wikipedia search error: {e}") return results def search_duckduckgo(query: str) -> list: """Search using DuckDuckGo""" results = [] try: with DDGS() as ddgs: # Simple search without problematic parameters search_results = list(ddgs.text(query, max_results=5)) for r in search_results: results.append({ "title": r.get("title", ""), "content": r.get("body", ""), "url": r.get("href", "") }) except Exception as e: print(f"DuckDuckGo error: {e}") return results def search_google_fallback(query: str) -> list: """Fallback Google search using alternative methods""" results = [] try: # Try Google Custom Search JSON API simulation # This is a fallback method - in production, use proper API encoded_query = urllib.parse.quote(query) # Try to get Google search results page headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } # Use a Google search URL search_url = f"https://www.google.com/search?q={encoded_query}&hl=en" # Note: This is a simplified approach and may not always work # In production, use Google Custom Search API except Exception as e: print(f"Google fallback error: {e}") return results def search_bing_fallback(query: str) -> list: """Fallback Bing search""" results = [] try: # Bing Web Search API would be used here in production # This is a placeholder for the pattern pass except Exception as e: print(f"Bing fallback error: {e}") return results @tool def calculate(expression: str) -> str: """Evaluate mathematical expressions safely.""" try: # Clean the expression expression = expression.strip() # Handle various notations expression = expression.replace("×", "*").replace("÷", "/") expression = expression.replace("^", "**") expression = expression.replace(",", "") # Handle percentages expression = re.sub(r'(\d+(?:\.\d+)?)\s*%\s*of\s*(\d+(?:\.\d+)?)', r'(\2 * \1 / 100)', expression) expression = re.sub(r'(\d+(?:\.\d+)?)\s*%', r'(\1/100)', expression) # Safe evaluation allowed_names = { "abs": abs, "round": round, "min": min, "max": max, "pow": pow, "sum": sum, "__builtins__": {} } result = eval(expression, allowed_names) if isinstance(result, float) and result.is_integer(): return str(int(result)) return str(result) except Exception as e: return f"Calculation error: {e}" @tool def wikipedia_summary(query: str) -> str: """Get Wikipedia summary for a topic.""" try: results = search_wikipedia(query) if results: # Combine top results summaries = [] for r in results[:2]: summaries.append(f"{r['title']}: {r['content']}") return "\n\n".join(summaries) return f"No Wikipedia article found for '{query}'" except Exception as e: return f"Wikipedia error: {e}" @tool def define_term(term: str) -> str: """Define a term using dictionary API.""" try: term = term.strip().lower() # Try dictionary API response = requests.get( f"https://api.dictionaryapi.dev/api/v2/entries/en/{term}", timeout=10 ) if response.status_code == 200: data = response.json() definitions = [] for entry in data: for meaning in entry.get("meanings", []): for definition in meaning.get("definitions", []): def_text = definition.get("definition", "") if def_text: definitions.append(def_text) if definitions: return definitions[0] # Return first definition # Fallback to Wikipedia wiki_results = search_wikipedia(f"{term} definition meaning") if wiki_results: return wiki_results[0]['content'][:200] return f"No definition found for '{term}'" except Exception as e: return f"Definition error: {e}" # Advanced search function for specific GAIA queries @tool def gaia_smart_search(query: str) -> str: """Smart search specifically optimized for GAIA questions.""" # Parse query for specific patterns query_lower = query.lower() # For album/discography queries if 'album' in query_lower or 'discography' in query_lower: artist_match = re.search(r'([\w\s]+?)(?:\s+album|\s+discography|\s+between)', query) if artist_match: artist = artist_match.group(1).strip() # Search for discography return web_search(f"{artist} discography albums list") # For Olympic queries if 'olympic' in query_lower: year_match = re.search(r'(\d{4})\s+(?:summer|winter)?\s*olympics', query_lower) if year_match: year = year_match.group(1) return web_search(f"{year} Olympics participating countries athletes count") # For academic papers if 'paper' in query_lower or 'article' in query_lower: author_match = re.search(r'by\s+([\w\s]+?)(?:\s+was|\s+published|\s+in)', query) if author_match: author = author_match.group(1).strip() return web_search(f"{author} research paper article") # Default to regular search return web_search(query) # List of tools TOOLS = [web_search, calculate, wikipedia_summary, define_term, reverse_text, gaia_smart_search]