Spaces:
Running
Running
File size: 3,259 Bytes
372531f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import os
from ..utils import check_pkg
class ExaSearch:
"""
Exa API Retriever
"""
def __init__(self, query):
"""
Initializes the ExaSearch object.
Args:
query: The search query.
"""
# This validation is necessary since exa_py is optional
check_pkg("exa_py")
from exa_py import Exa
self.query = query
self.api_key = self._retrieve_api_key()
self.client = Exa(api_key=self.api_key)
def _retrieve_api_key(self):
"""
Retrieves the Exa API key from environment variables.
Returns:
The API key.
Raises:
Exception: If the API key is not found.
"""
try:
api_key = os.environ["EXA_API_KEY"]
except KeyError:
raise Exception(
"Exa API key not found. Please set the EXA_API_KEY environment variable. "
"You can obtain your key from https://exa.ai/"
)
return api_key
def search(
self, max_results=10, use_autoprompt=False, search_type="neural", **filters
):
"""
Searches the query using the Exa API.
Args:
max_results: The maximum number of results to return.
use_autoprompt: Whether to use autoprompting.
search_type: The type of search (e.g., "neural", "keyword").
**filters: Additional filters (e.g., date range, domains).
Returns:
A list of search results.
"""
results = self.client.search(
self.query,
type=search_type,
use_autoprompt=use_autoprompt,
num_results=max_results,
**filters
)
search_response = [
{"href": result.url, "body": result.text} for result in results.results
]
return search_response
def find_similar(self, url, exclude_source_domain=False, **filters):
"""
Finds similar documents to the provided URL using the Exa API.
Args:
url: The URL to find similar documents for.
exclude_source_domain: Whether to exclude the source domain in the results.
**filters: Additional filters.
Returns:
A list of similar documents.
"""
results = self.client.find_similar(
url, exclude_source_domain=exclude_source_domain, **filters
)
similar_response = [
{"href": result.url, "body": result.text} for result in results.results
]
return similar_response
def get_contents(self, ids, **options):
"""
Retrieves the contents of the specified IDs using the Exa API.
Args:
ids: The IDs of the documents to retrieve.
**options: Additional options for content retrieval.
Returns:
A list of document contents.
"""
results = self.client.get_contents(ids, **options)
contents_response = [
{"id": result.id, "content": result.text} for result in results.results
]
return contents_response
|