Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files
examples/debate/agent/search/search_engine.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
from enum import Enum
|
3 |
+
from typing import List
|
4 |
+
|
5 |
+
|
6 |
+
class SearchAPI(Enum):
|
7 |
+
PERPLEXITY = "perplexity"
|
8 |
+
TAVILY = "tavily"
|
9 |
+
EXA = "exa"
|
10 |
+
ARXIV = "arxiv"
|
11 |
+
PUBMED = "pubmed"
|
12 |
+
LINKUP = "linkup"
|
13 |
+
|
14 |
+
|
15 |
+
class SearchException(Exception):
|
16 |
+
|
17 |
+
def __init__(self, message, error_code=None):
|
18 |
+
super().__init__()
|
19 |
+
self.message = message
|
20 |
+
self.error_code = error_code
|
21 |
+
|
22 |
+
def __str__(self):
|
23 |
+
return f'{self.message}'
|
24 |
+
|
25 |
+
|
26 |
+
class SearchEngine:
|
27 |
+
|
28 |
+
def batch_search(self, queries: List[str], max_results=5, include_raw_content=False, **kwargs):
|
29 |
+
try:
|
30 |
+
return asyncio.run(
|
31 |
+
self.async_batch_search(queries, max_results=max_results, include_raw_content=include_raw_content, **kwargs))
|
32 |
+
except Exception as err:
|
33 |
+
raise SearchException(f"search queries = {queries} failed.")
|
34 |
+
|
35 |
+
async def async_batch_search(self, queries: List[str], max_results=5, include_raw_content=False, **kwargs):
|
36 |
+
search_tasks = []
|
37 |
+
for query in queries:
|
38 |
+
search_tasks.append(
|
39 |
+
self.async_search(query, max_results, include_raw_content, **kwargs)
|
40 |
+
)
|
41 |
+
|
42 |
+
# Execute all searches concurrently
|
43 |
+
search_docs = await asyncio.gather(*search_tasks)
|
44 |
+
|
45 |
+
return search_docs
|
46 |
+
|
47 |
+
async def async_search(self, query: str, max_results=5, include_raw_content=False, **kwargs) -> dict:
|
48 |
+
pass
|
examples/debate/agent/search/tavily_search_engine.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from examples.debate.agent.search.search_engine import SearchEngine
|
3 |
+
|
4 |
+
|
5 |
+
class TavilySearchEngine(SearchEngine):
|
6 |
+
"""Tavily"""
|
7 |
+
async def async_search(self, query: str, max_results=5, include_raw_content=False, **kwargs) -> dict:
|
8 |
+
"""
|
9 |
+
Performs concurrent web searches using the Tavily API.
|
10 |
+
|
11 |
+
Args:
|
12 |
+
query (Str): str
|
13 |
+
|
14 |
+
Returns:
|
15 |
+
dict: search responses from Tavily API, one per query. Each response has format:
|
16 |
+
{
|
17 |
+
'query': str, # The original search query
|
18 |
+
'follow_up_questions': None,
|
19 |
+
'answer': None,
|
20 |
+
'images': list,
|
21 |
+
'results': [ # List of search results
|
22 |
+
{
|
23 |
+
'title': str, # Title of the webpage
|
24 |
+
'url': str, # URL of the result
|
25 |
+
'content': str, # Summary/snippet of content
|
26 |
+
'score': float, # Relevance score
|
27 |
+
'raw_content': str|None # Full page content if available
|
28 |
+
},
|
29 |
+
...
|
30 |
+
]
|
31 |
+
}
|
32 |
+
"""
|
33 |
+
try:
|
34 |
+
from tavily import AsyncTavilyClient
|
35 |
+
except ImportError:
|
36 |
+
# install mistune
|
37 |
+
import subprocess
|
38 |
+
subprocess.run(["pip", "install", "tavily-python>=0.5.1"], check=True)
|
39 |
+
from tavily import AsyncTavilyClient
|
40 |
+
tavily_async_client = AsyncTavilyClient()
|
41 |
+
return await tavily_async_client.search(
|
42 |
+
query,
|
43 |
+
max_results=5,
|
44 |
+
include_raw_content=True,
|
45 |
+
topic="general"
|
46 |
+
)
|