mjschock commited on
Commit
50aafe2
·
unverified ·
1 Parent(s): ea0c151

Update requirements.txt to add new dependencies for enhanced functionality, including kagglehub, langchain, and llama-index packages. Refactor SmartSearchTool in tool.py to replace Wikipedia search with a webpage visiting approach, improving content retrieval from web search results. Update tool description and logging for better clarity and usability.

Browse files
Files changed (2) hide show
  1. requirements.txt +9 -5
  2. tools/smart_search/tool.py +23 -34
requirements.txt CHANGED
@@ -1,17 +1,21 @@
1
- black>=25.1.0
2
  duckduckgo-search>=8.0.1
3
  gradio[oauth]>=5.26.0
4
  isort>=6.0.1
 
 
 
5
  langgraph>=0.3.34
6
  litellm>=1.10.0
 
 
 
 
 
7
  pytest>=8.3.5
8
  pytest-cov>=6.1.1
9
  python-dotenv>=1.0.0
10
  requests>=2.32.3
11
  smolagents[litellm,telemetry]>=1.14.0
12
  typing-extensions>=4.5.0
 
13
  wikipedia-api>=0.8.1
14
- langchain>=0.1.0
15
- langchain-community>=0.0.10
16
- pandas>=2.0.0
17
- Wikipedia-API>=0.8.1
 
 
1
  duckduckgo-search>=8.0.1
2
  gradio[oauth]>=5.26.0
3
  isort>=6.0.1
4
+ kagglehub>=0.3.12
5
+ langchain>=0.1.0
6
+ langchain-community>=0.0.10
7
  langgraph>=0.3.34
8
  litellm>=1.10.0
9
+ llama-index>=0.12.33
10
+ llama-index-embeddings-huggingface>=0.5.3
11
+ llama-index-readers-wikipedia>=0.3.0
12
+ mlcroissant>=1.0.17
13
+ pandas>=2.0.0
14
  pytest>=8.3.5
15
  pytest-cov>=6.1.1
16
  python-dotenv>=1.0.0
17
  requests>=2.32.3
18
  smolagents[litellm,telemetry]>=1.14.0
19
  typing-extensions>=4.5.0
20
+ wikipedia>=1.4.0
21
  wikipedia-api>=0.8.1
 
 
 
 
tools/smart_search/tool.py CHANGED
@@ -1,60 +1,49 @@
1
  import logging
2
  import re
3
  from smolagents import Tool
4
- from smolagents.default_tools import DuckDuckGoSearchTool, WikipediaSearchTool
5
 
6
  logger = logging.getLogger(__name__)
7
 
8
 
9
  class SmartSearchTool(Tool):
10
  name = "smart_search"
11
- description = """A smart search tool that first performs a web search and then, if a Wikipedia article is found,
12
- uses Wikipedia search for more reliable information."""
13
  inputs = {"query": {"type": "string", "description": "The search query to find information"}}
14
  output_type = "string"
15
 
16
  def __init__(self):
17
  super().__init__()
18
  self.web_search_tool = DuckDuckGoSearchTool(max_results=1)
19
- self.wiki_tool = WikipediaSearchTool(
20
- user_agent="SmartSearchTool ([email protected])",
21
- language="en",
22
- # content_type="summary",
23
- content_type="text",
24
- extract_format="WIKI"
25
- )
26
 
27
  def forward(self, query: str) -> str:
28
  logger.info(f"Starting smart search for query: {query}")
29
 
30
- # First perform a web search with a single result
31
  web_result = self.web_search_tool.forward(query)
32
  logger.info(f"Web search result: {web_result[:100]}...")
33
 
34
- # Check if the result contains a Wikipedia link
35
- if "wikipedia.org" in web_result.lower():
36
- logger.info("Wikipedia link found in web search results")
37
- # Extract the Wikipedia page title from the URL using regex
38
- wiki_match = re.search(r'wikipedia\.org/wiki/([^)\s]+)', web_result)
39
- if wiki_match:
40
- wiki_title = wiki_match.group(1)
41
- logger.info(f"Extracted Wikipedia title: {wiki_title}")
42
-
43
- # Use Wikipedia search for more reliable information
44
- wiki_result = self.wiki_tool.forward(wiki_title)
45
- logger.info(f"Wikipedia search result: {wiki_result[:100]}...")
46
-
47
- if wiki_result and "No Wikipedia page found" not in wiki_result:
48
- logger.info("Successfully retrieved Wikipedia content")
49
- return f"Web search result:\n{web_result}\n\nWikipedia result:\n{wiki_result}"
50
- else:
51
- logger.warning("Wikipedia search failed or returned no results")
52
- else:
53
- logger.warning("Could not extract Wikipedia title from URL")
54
 
55
- # If no Wikipedia link was found or Wikipedia search failed, return the web search result
56
- logger.info("Returning web search result only")
57
- return f"Web search result:\n{web_result}"
 
 
 
 
 
 
 
 
 
 
 
58
 
59
 
60
  def main(query: str) -> str:
 
1
  import logging
2
  import re
3
  from smolagents import Tool
4
+ from smolagents.default_tools import DuckDuckGoSearchTool, VisitWebpageTool
5
 
6
  logger = logging.getLogger(__name__)
7
 
8
 
9
  class SmartSearchTool(Tool):
10
  name = "smart_search"
11
+ description = """A smart search tool that first performs a web search and then visits each URL to get its content."""
 
12
  inputs = {"query": {"type": "string", "description": "The search query to find information"}}
13
  output_type = "string"
14
 
15
  def __init__(self):
16
  super().__init__()
17
  self.web_search_tool = DuckDuckGoSearchTool(max_results=1)
18
+ self.visit_webpage_tool = VisitWebpageTool(max_output_length=-1)
 
 
 
 
 
 
19
 
20
  def forward(self, query: str) -> str:
21
  logger.info(f"Starting smart search for query: {query}")
22
 
23
+ # Get web search results
24
  web_result = self.web_search_tool.forward(query)
25
  logger.info(f"Web search result: {web_result[:100]}...")
26
 
27
+ # Extract URLs from the web search result
28
+ urls = re.findall(r'https?://[^\s)]+', web_result)
29
+ if not urls:
30
+ logger.info("No URLs found in web search result")
31
+ return f"Web search result:\n{web_result}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ # Visit each URL and get its content
34
+ contents = []
35
+ for url in urls:
36
+ logger.info(f"Visiting URL: {url}")
37
+ try:
38
+ content = self.visit_webpage_tool.forward(url)
39
+ if content:
40
+ contents.append(f"\nContent from {url}:\n{content}")
41
+ except Exception as e:
42
+ logger.warning(f"Error visiting {url}: {e}")
43
+ contents.append(f"\nError visiting {url}: {e}")
44
+
45
+ # Combine all results
46
+ return f"Web search result:\n{web_result}\n" + "\n".join(contents)
47
 
48
 
49
  def main(query: str) -> str: