Spaces:
Sleeping
Sleeping
# search_tool.py | |
from typing import List, Dict | |
import logging | |
from config import DUCKDUCKGO_MAX_RESULTS, DUCKDUCKGO_TIMEOUT, SEARCH_DEFAULT_ENGINE, LOG_LEVEL | |
from exceptions import NoResultsFound, SearchEngineUnavailable | |
try: | |
from duckduckgo_search import DDGS | |
except ImportError as e: | |
raise ImportError("Missing dependency: install with `pip install duckduckgo-search`") from e | |
logging.basicConfig(level=LOG_LEVEL) | |
logger = logging.getLogger(__name__) | |
class DuckDuckGoSearchTool: | |
""" | |
Production-grade DuckDuckGo search tool. | |
Returns structured results and handles errors robustly. | |
""" | |
def __init__(self, max_results: int = None, timeout: int = None): | |
self.max_results = max_results or DUCKDUCKGO_MAX_RESULTS | |
self.timeout = timeout or DUCKDUCKGO_TIMEOUT | |
try: | |
self.ddgs = DDGS(timeout=self.timeout) | |
except Exception as ex: | |
logger.critical("Failed to initialize DDGS: %s", ex) | |
raise SearchEngineUnavailable("Failed to initialize DuckDuckGo search engine.") from ex | |
def search(self, query: str) -> List[Dict]: | |
if not isinstance(query, str) or not query.strip(): | |
logger.warning("Invalid search query provided: '%s'", query) | |
raise ValueError("Query must be a non-empty string.") | |
try: | |
results = self.ddgs.text(query, max_results=self.max_results) | |
except Exception as ex: | |
logger.error("Search failed: %s", ex) | |
raise SearchEngineUnavailable("DuckDuckGo search failed.") from ex | |
if not results: | |
logger.info("No results found for query: '%s'", query) | |
raise NoResultsFound(f"No results found for query: '{query}'") | |
safe_results = [self._sanitize_result(res) for res in results] | |
return safe_results | |
def _sanitize_result(result: Dict) -> Dict: | |
"""Sanitize user-facing fields to prevent markdown injection, etc.""" | |
def escape_md(text: str) -> str: | |
# Very simple; improve as needed (real production code may need a markdown library) | |
return text.replace('[', '').replace(']', '').replace('(', '').replace(')', '') | |
return { | |
"title": escape_md(result.get("title", "")), | |
"link": result.get("href", ""), | |
"snippet": escape_md(result.get("body", "")), | |
} | |
def format_markdown(results: List[Dict]) -> str: | |
"""Format results as markdown. Keep presentation separate from core logic.""" | |
if not results: | |
return "No results found." | |
lines = [] | |
for res in results: | |
lines.append(f"- [{res['title']}]({res['link']})\n {res['snippet']}") | |
return "## Search Results\n\n" + "\n\n".join(lines) |