Tesvia commited on
Commit
05e018f
·
verified ·
1 Parent(s): 1872edd

Upload web_search.py

Browse files
Files changed (1) hide show
  1. tools/web_search.py +71 -0
tools/web_search.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # search_tool.py
2
+
3
+ from typing import List, Dict
4
+ import logging
5
+
6
+ from config import DUCKDUCKGO_MAX_RESULTS, DUCKDUCKGO_TIMEOUT, SEARCH_DEFAULT_ENGINE, LOG_LEVEL
7
+ from exceptions import NoResultsFound, SearchEngineUnavailable
8
+
9
+ try:
10
+ from duckduckgo_search import DDGS
11
+ except ImportError as e:
12
+ raise ImportError("Missing dependency: install with `pip install duckduckgo-search`") from e
13
+
14
+ logging.basicConfig(level=LOG_LEVEL)
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class DuckDuckGoSearchTool:
19
+ """
20
+ Production-grade DuckDuckGo search tool.
21
+ Returns structured results and handles errors robustly.
22
+ """
23
+
24
+ def __init__(self, max_results: int = None, timeout: int = None):
25
+ self.max_results = max_results or DUCKDUCKGO_MAX_RESULTS
26
+ self.timeout = timeout or DUCKDUCKGO_TIMEOUT
27
+ try:
28
+ self.ddgs = DDGS(timeout=self.timeout)
29
+ except Exception as ex:
30
+ logger.critical("Failed to initialize DDGS: %s", ex)
31
+ raise SearchEngineUnavailable("Failed to initialize DuckDuckGo search engine.") from ex
32
+
33
+ def search(self, query: str) -> List[Dict]:
34
+ if not isinstance(query, str) or not query.strip():
35
+ logger.warning("Invalid search query provided: '%s'", query)
36
+ raise ValueError("Query must be a non-empty string.")
37
+ try:
38
+ results = self.ddgs.text(query, max_results=self.max_results)
39
+ except Exception as ex:
40
+ logger.error("Search failed: %s", ex)
41
+ raise SearchEngineUnavailable("DuckDuckGo search failed.") from ex
42
+
43
+ if not results:
44
+ logger.info("No results found for query: '%s'", query)
45
+ raise NoResultsFound(f"No results found for query: '{query}'")
46
+
47
+ safe_results = [self._sanitize_result(res) for res in results]
48
+ return safe_results
49
+
50
+ @staticmethod
51
+ def _sanitize_result(result: Dict) -> Dict:
52
+ """Sanitize user-facing fields to prevent markdown injection, etc."""
53
+ def escape_md(text: str) -> str:
54
+ # Very simple; improve as needed (real production code may need a markdown library)
55
+ return text.replace('[', '').replace(']', '').replace('(', '').replace(')', '')
56
+
57
+ return {
58
+ "title": escape_md(result.get("title", "")),
59
+ "link": result.get("href", ""),
60
+ "snippet": escape_md(result.get("body", "")),
61
+ }
62
+
63
+ @staticmethod
64
+ def format_markdown(results: List[Dict]) -> str:
65
+ """Format results as markdown. Keep presentation separate from core logic."""
66
+ if not results:
67
+ return "No results found."
68
+ lines = []
69
+ for res in results:
70
+ lines.append(f"- [{res['title']}]({res['link']})\n {res['snippet']}")
71
+ return "## Search Results\n\n" + "\n\n".join(lines)