Tesvia's picture
Upload web_search.py
05e018f verified
raw
history blame
2.8 kB
# search_tool.py
from typing import List, Dict
import logging
from config import DUCKDUCKGO_MAX_RESULTS, DUCKDUCKGO_TIMEOUT, SEARCH_DEFAULT_ENGINE, LOG_LEVEL
from exceptions import NoResultsFound, SearchEngineUnavailable
try:
from duckduckgo_search import DDGS
except ImportError as e:
raise ImportError("Missing dependency: install with `pip install duckduckgo-search`") from e
logging.basicConfig(level=LOG_LEVEL)
logger = logging.getLogger(__name__)
class DuckDuckGoSearchTool:
"""
Production-grade DuckDuckGo search tool.
Returns structured results and handles errors robustly.
"""
def __init__(self, max_results: int = None, timeout: int = None):
self.max_results = max_results or DUCKDUCKGO_MAX_RESULTS
self.timeout = timeout or DUCKDUCKGO_TIMEOUT
try:
self.ddgs = DDGS(timeout=self.timeout)
except Exception as ex:
logger.critical("Failed to initialize DDGS: %s", ex)
raise SearchEngineUnavailable("Failed to initialize DuckDuckGo search engine.") from ex
def search(self, query: str) -> List[Dict]:
if not isinstance(query, str) or not query.strip():
logger.warning("Invalid search query provided: '%s'", query)
raise ValueError("Query must be a non-empty string.")
try:
results = self.ddgs.text(query, max_results=self.max_results)
except Exception as ex:
logger.error("Search failed: %s", ex)
raise SearchEngineUnavailable("DuckDuckGo search failed.") from ex
if not results:
logger.info("No results found for query: '%s'", query)
raise NoResultsFound(f"No results found for query: '{query}'")
safe_results = [self._sanitize_result(res) for res in results]
return safe_results
@staticmethod
def _sanitize_result(result: Dict) -> Dict:
"""Sanitize user-facing fields to prevent markdown injection, etc."""
def escape_md(text: str) -> str:
# Very simple; improve as needed (real production code may need a markdown library)
return text.replace('[', '').replace(']', '').replace('(', '').replace(')', '')
return {
"title": escape_md(result.get("title", "")),
"link": result.get("href", ""),
"snippet": escape_md(result.get("body", "")),
}
@staticmethod
def format_markdown(results: List[Dict]) -> str:
"""Format results as markdown. Keep presentation separate from core logic."""
if not results:
return "No results found."
lines = []
for res in results:
lines.append(f"- [{res['title']}]({res['link']})\n {res['snippet']}")
return "## Search Results\n\n" + "\n\n".join(lines)