Spaces:
Sleeping
Sleeping
Upload web_search.py
Browse files- tools/web_search.py +71 -0
tools/web_search.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# search_tool.py
|
2 |
+
|
3 |
+
from typing import List, Dict
|
4 |
+
import logging
|
5 |
+
|
6 |
+
from config import DUCKDUCKGO_MAX_RESULTS, DUCKDUCKGO_TIMEOUT, SEARCH_DEFAULT_ENGINE, LOG_LEVEL
|
7 |
+
from exceptions import NoResultsFound, SearchEngineUnavailable
|
8 |
+
|
9 |
+
try:
|
10 |
+
from duckduckgo_search import DDGS
|
11 |
+
except ImportError as e:
|
12 |
+
raise ImportError("Missing dependency: install with `pip install duckduckgo-search`") from e
|
13 |
+
|
14 |
+
logging.basicConfig(level=LOG_LEVEL)
|
15 |
+
logger = logging.getLogger(__name__)
|
16 |
+
|
17 |
+
|
18 |
+
class DuckDuckGoSearchTool:
|
19 |
+
"""
|
20 |
+
Production-grade DuckDuckGo search tool.
|
21 |
+
Returns structured results and handles errors robustly.
|
22 |
+
"""
|
23 |
+
|
24 |
+
def __init__(self, max_results: int = None, timeout: int = None):
|
25 |
+
self.max_results = max_results or DUCKDUCKGO_MAX_RESULTS
|
26 |
+
self.timeout = timeout or DUCKDUCKGO_TIMEOUT
|
27 |
+
try:
|
28 |
+
self.ddgs = DDGS(timeout=self.timeout)
|
29 |
+
except Exception as ex:
|
30 |
+
logger.critical("Failed to initialize DDGS: %s", ex)
|
31 |
+
raise SearchEngineUnavailable("Failed to initialize DuckDuckGo search engine.") from ex
|
32 |
+
|
33 |
+
def search(self, query: str) -> List[Dict]:
|
34 |
+
if not isinstance(query, str) or not query.strip():
|
35 |
+
logger.warning("Invalid search query provided: '%s'", query)
|
36 |
+
raise ValueError("Query must be a non-empty string.")
|
37 |
+
try:
|
38 |
+
results = self.ddgs.text(query, max_results=self.max_results)
|
39 |
+
except Exception as ex:
|
40 |
+
logger.error("Search failed: %s", ex)
|
41 |
+
raise SearchEngineUnavailable("DuckDuckGo search failed.") from ex
|
42 |
+
|
43 |
+
if not results:
|
44 |
+
logger.info("No results found for query: '%s'", query)
|
45 |
+
raise NoResultsFound(f"No results found for query: '{query}'")
|
46 |
+
|
47 |
+
safe_results = [self._sanitize_result(res) for res in results]
|
48 |
+
return safe_results
|
49 |
+
|
50 |
+
@staticmethod
|
51 |
+
def _sanitize_result(result: Dict) -> Dict:
|
52 |
+
"""Sanitize user-facing fields to prevent markdown injection, etc."""
|
53 |
+
def escape_md(text: str) -> str:
|
54 |
+
# Very simple; improve as needed (real production code may need a markdown library)
|
55 |
+
return text.replace('[', '').replace(']', '').replace('(', '').replace(')', '')
|
56 |
+
|
57 |
+
return {
|
58 |
+
"title": escape_md(result.get("title", "")),
|
59 |
+
"link": result.get("href", ""),
|
60 |
+
"snippet": escape_md(result.get("body", "")),
|
61 |
+
}
|
62 |
+
|
63 |
+
@staticmethod
|
64 |
+
def format_markdown(results: List[Dict]) -> str:
|
65 |
+
"""Format results as markdown. Keep presentation separate from core logic."""
|
66 |
+
if not results:
|
67 |
+
return "No results found."
|
68 |
+
lines = []
|
69 |
+
for res in results:
|
70 |
+
lines.append(f"- [{res['title']}]({res['link']})\n {res['snippet']}")
|
71 |
+
return "## Search Results\n\n" + "\n\n".join(lines)
|