Spaces:
Sleeping
Sleeping
import requests | |
from duckduckgo_search import DDGS | |
from langchain_core.tools import tool | |
import time | |
import re | |
import json | |
from datetime import datetime, timedelta | |
import urllib.parse | |
# Rate limiting | |
last_search_time = None | |
min_search_interval = 1.0 | |
def reverse_text(input: str) -> str: | |
"""Reverse the characters in a text or string.""" | |
return input[::-1] | |
def web_search(query: str) -> str: | |
"""Perform web search using multiple providers for robustness.""" | |
global last_search_time | |
# Rate limiting | |
if last_search_time: | |
elapsed = time.time() - last_search_time | |
if elapsed < min_search_interval: | |
time.sleep(min_search_interval - elapsed) | |
query = query.strip() | |
if not query: | |
return "Empty search query" | |
results = [] | |
# Try multiple search methods in order | |
search_methods = [ | |
("Wikipedia", search_wikipedia), | |
("Google (via SerpAPI simulation)", search_google_fallback), | |
("DuckDuckGo", search_duckduckgo), | |
("Bing", search_bing_fallback), | |
] | |
for method_name, method_func in search_methods: | |
try: | |
print(f"Trying {method_name} search...") | |
method_results = method_func(query) | |
if method_results: | |
results.extend(method_results) | |
print(f"{method_name} found {len(method_results)} results") | |
if len(results) >= 3: # Enough results | |
break | |
except Exception as e: | |
print(f"{method_name} search failed: {e}") | |
continue | |
if not results: | |
return "No search results found. All search methods failed." | |
# Format results | |
formatted_results = [] | |
for i, result in enumerate(results[:8]): | |
if isinstance(result, dict): | |
title = result.get('title', '') | |
content = result.get('content', '') | |
url = result.get('url', '') | |
formatted = f"{title}. {content}" | |
if url: | |
formatted += f" (Source: {url})" | |
formatted_results.append(formatted) | |
else: | |
formatted_results.append(str(result)) | |
return "\n\n".join(formatted_results) | |
def search_wikipedia(query: str) -> list: | |
"""Search Wikipedia directly""" | |
results = [] | |
try: | |
# Wikipedia API search | |
search_url = "https://en.wikipedia.org/w/api.php" | |
# First, search for articles | |
search_params = { | |
"action": "query", | |
"list": "search", | |
"srsearch": query, | |
"format": "json", | |
"srlimit": 5, | |
"srprop": "snippet|titlesnippet|size|wordcount" | |
} | |
response = requests.get(search_url, params=search_params, timeout=10) | |
if response.status_code == 200: | |
data = response.json() | |
search_results = data.get("query", {}).get("search", []) | |
for item in search_results[:3]: | |
title = item.get("title", "") | |
snippet = re.sub(r'<[^>]+>', '', item.get("snippet", "")) | |
# Get more detailed content | |
page_params = { | |
"action": "query", | |
"prop": "extracts|info", | |
"exintro": True, | |
"explaintext": True, | |
"inprop": "url", | |
"titles": title, | |
"format": "json", | |
"exsentences": 5 | |
} | |
page_response = requests.get(search_url, params=page_params, timeout=10) | |
if page_response.status_code == 200: | |
page_data = page_response.json() | |
pages = page_data.get("query", {}).get("pages", {}) | |
for page_id, page_info in pages.items(): | |
extract = page_info.get("extract", "") | |
url = page_info.get("fullurl", "") | |
if extract: | |
results.append({ | |
"title": f"Wikipedia: {title}", | |
"content": extract[:500], | |
"url": url | |
}) | |
break | |
else: | |
# Use snippet if can't get extract | |
results.append({ | |
"title": f"Wikipedia: {title}", | |
"content": snippet, | |
"url": f"https://en.wikipedia.org/wiki/{title.replace(' ', '_')}" | |
}) | |
except Exception as e: | |
print(f"Wikipedia search error: {e}") | |
return results | |
def search_duckduckgo(query: str) -> list: | |
"""Search using DuckDuckGo""" | |
results = [] | |
try: | |
with DDGS() as ddgs: | |
# Simple search without problematic parameters | |
search_results = list(ddgs.text(query, max_results=5)) | |
for r in search_results: | |
results.append({ | |
"title": r.get("title", ""), | |
"content": r.get("body", ""), | |
"url": r.get("href", "") | |
}) | |
except Exception as e: | |
print(f"DuckDuckGo error: {e}") | |
return results | |
def search_google_fallback(query: str) -> list: | |
"""Fallback Google search using alternative methods""" | |
results = [] | |
try: | |
# Try Google Custom Search JSON API simulation | |
# This is a fallback method - in production, use proper API | |
encoded_query = urllib.parse.quote(query) | |
# Try to get Google search results page | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' | |
} | |
# Use a Google search URL | |
search_url = f"https://www.google.com/search?q={encoded_query}&hl=en" | |
# Note: This is a simplified approach and may not always work | |
# In production, use Google Custom Search API | |
except Exception as e: | |
print(f"Google fallback error: {e}") | |
return results | |
def search_bing_fallback(query: str) -> list: | |
"""Fallback Bing search""" | |
results = [] | |
try: | |
# Bing Web Search API would be used here in production | |
# This is a placeholder for the pattern | |
pass | |
except Exception as e: | |
print(f"Bing fallback error: {e}") | |
return results | |
def calculate(expression: str) -> str: | |
"""Evaluate mathematical expressions safely.""" | |
try: | |
# Clean the expression | |
expression = expression.strip() | |
# Handle various notations | |
expression = expression.replace("×", "*").replace("÷", "/") | |
expression = expression.replace("^", "**") | |
expression = expression.replace(",", "") | |
# Handle percentages | |
expression = re.sub(r'(\d+(?:\.\d+)?)\s*%\s*of\s*(\d+(?:\.\d+)?)', r'(\2 * \1 / 100)', expression) | |
expression = re.sub(r'(\d+(?:\.\d+)?)\s*%', r'(\1/100)', expression) | |
# Safe evaluation | |
allowed_names = { | |
"abs": abs, "round": round, "min": min, "max": max, | |
"pow": pow, "sum": sum, "__builtins__": {} | |
} | |
result = eval(expression, allowed_names) | |
if isinstance(result, float) and result.is_integer(): | |
return str(int(result)) | |
return str(result) | |
except Exception as e: | |
return f"Calculation error: {e}" | |
def wikipedia_summary(query: str) -> str: | |
"""Get Wikipedia summary for a topic.""" | |
try: | |
results = search_wikipedia(query) | |
if results: | |
# Combine top results | |
summaries = [] | |
for r in results[:2]: | |
summaries.append(f"{r['title']}: {r['content']}") | |
return "\n\n".join(summaries) | |
return f"No Wikipedia article found for '{query}'" | |
except Exception as e: | |
return f"Wikipedia error: {e}" | |
def define_term(term: str) -> str: | |
"""Define a term using dictionary API.""" | |
try: | |
term = term.strip().lower() | |
# Try dictionary API | |
response = requests.get( | |
f"https://api.dictionaryapi.dev/api/v2/entries/en/{term}", | |
timeout=10 | |
) | |
if response.status_code == 200: | |
data = response.json() | |
definitions = [] | |
for entry in data: | |
for meaning in entry.get("meanings", []): | |
for definition in meaning.get("definitions", []): | |
def_text = definition.get("definition", "") | |
if def_text: | |
definitions.append(def_text) | |
if definitions: | |
return definitions[0] # Return first definition | |
# Fallback to Wikipedia | |
wiki_results = search_wikipedia(f"{term} definition meaning") | |
if wiki_results: | |
return wiki_results[0]['content'][:200] | |
return f"No definition found for '{term}'" | |
except Exception as e: | |
return f"Definition error: {e}" | |
# Advanced search function for specific GAIA queries | |
def gaia_smart_search(query: str) -> str: | |
"""Smart search specifically optimized for GAIA questions.""" | |
# Parse query for specific patterns | |
query_lower = query.lower() | |
# For album/discography queries | |
if 'album' in query_lower or 'discography' in query_lower: | |
artist_match = re.search(r'([\w\s]+?)(?:\s+album|\s+discography|\s+between)', query) | |
if artist_match: | |
artist = artist_match.group(1).strip() | |
# Search for discography | |
return web_search(f"{artist} discography albums list") | |
# For Olympic queries | |
if 'olympic' in query_lower: | |
year_match = re.search(r'(\d{4})\s+(?:summer|winter)?\s*olympics', query_lower) | |
if year_match: | |
year = year_match.group(1) | |
return web_search(f"{year} Olympics participating countries athletes count") | |
# For academic papers | |
if 'paper' in query_lower or 'article' in query_lower: | |
author_match = re.search(r'by\s+([\w\s]+?)(?:\s+was|\s+published|\s+in)', query) | |
if author_match: | |
author = author_match.group(1).strip() | |
return web_search(f"{author} research paper article") | |
# Default to regular search | |
return web_search(query) | |
# List of tools | |
TOOLS = [web_search, calculate, wikipedia_summary, define_term, reverse_text, gaia_smart_search] |