Spaces:
Sleeping
Sleeping
Delete tools/web_search.py
Browse files- tools/web_search.py +0 -71
tools/web_search.py
DELETED
@@ -1,71 +0,0 @@
|
|
1 |
-
# search_tool.py
|
2 |
-
|
3 |
-
from typing import List, Dict
|
4 |
-
import logging
|
5 |
-
|
6 |
-
from config import DUCKDUCKGO_MAX_RESULTS, DUCKDUCKGO_TIMEOUT, SEARCH_DEFAULT_ENGINE, LOG_LEVEL
|
7 |
-
from exceptions import NoResultsFound, SearchEngineUnavailable
|
8 |
-
|
9 |
-
try:
|
10 |
-
from duckduckgo_search import DDGS
|
11 |
-
except ImportError as e:
|
12 |
-
raise ImportError("Missing dependency: install with `pip install duckduckgo-search`") from e
|
13 |
-
|
14 |
-
logging.basicConfig(level=LOG_LEVEL)
|
15 |
-
logger = logging.getLogger(__name__)
|
16 |
-
|
17 |
-
|
18 |
-
class DuckDuckGoSearchTool:
|
19 |
-
"""
|
20 |
-
Production-grade DuckDuckGo search tool.
|
21 |
-
Returns structured results and handles errors robustly.
|
22 |
-
"""
|
23 |
-
|
24 |
-
def __init__(self, max_results: int = None, timeout: int = None):
|
25 |
-
self.max_results = max_results or DUCKDUCKGO_MAX_RESULTS
|
26 |
-
self.timeout = timeout or DUCKDUCKGO_TIMEOUT
|
27 |
-
try:
|
28 |
-
self.ddgs = DDGS(timeout=self.timeout)
|
29 |
-
except Exception as ex:
|
30 |
-
logger.critical("Failed to initialize DDGS: %s", ex)
|
31 |
-
raise SearchEngineUnavailable("Failed to initialize DuckDuckGo search engine.") from ex
|
32 |
-
|
33 |
-
def search(self, query: str) -> List[Dict]:
|
34 |
-
if not isinstance(query, str) or not query.strip():
|
35 |
-
logger.warning("Invalid search query provided: '%s'", query)
|
36 |
-
raise ValueError("Query must be a non-empty string.")
|
37 |
-
try:
|
38 |
-
results = self.ddgs.text(query, max_results=self.max_results)
|
39 |
-
except Exception as ex:
|
40 |
-
logger.error("Search failed: %s", ex)
|
41 |
-
raise SearchEngineUnavailable("DuckDuckGo search failed.") from ex
|
42 |
-
|
43 |
-
if not results:
|
44 |
-
logger.info("No results found for query: '%s'", query)
|
45 |
-
raise NoResultsFound(f"No results found for query: '{query}'")
|
46 |
-
|
47 |
-
safe_results = [self._sanitize_result(res) for res in results]
|
48 |
-
return safe_results
|
49 |
-
|
50 |
-
@staticmethod
|
51 |
-
def _sanitize_result(result: Dict) -> Dict:
|
52 |
-
"""Sanitize user-facing fields to prevent markdown injection, etc."""
|
53 |
-
def escape_md(text: str) -> str:
|
54 |
-
# Very simple; improve as needed (real production code may need a markdown library)
|
55 |
-
return text.replace('[', '').replace(']', '').replace('(', '').replace(')', '')
|
56 |
-
|
57 |
-
return {
|
58 |
-
"title": escape_md(result.get("title", "")),
|
59 |
-
"link": result.get("href", ""),
|
60 |
-
"snippet": escape_md(result.get("body", "")),
|
61 |
-
}
|
62 |
-
|
63 |
-
@staticmethod
|
64 |
-
def format_markdown(results: List[Dict]) -> str:
|
65 |
-
"""Format results as markdown. Keep presentation separate from core logic."""
|
66 |
-
if not results:
|
67 |
-
return "No results found."
|
68 |
-
lines = []
|
69 |
-
for res in results:
|
70 |
-
lines.append(f"- [{res['title']}]({res['link']})\n {res['snippet']}")
|
71 |
-
return "## Search Results\n\n" + "\n\n".join(lines)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|