Spaces:
Running
Running
from typing import Dict, List | |
import requests | |
class SemanticScholarSearch: | |
""" | |
Semantic Scholar API Retriever | |
""" | |
BASE_URL = "https://api.semanticscholar.org/graph/v1/paper/search" | |
VALID_SORT_CRITERIA = ["relevance", "citationCount", "publicationDate"] | |
def __init__(self, query: str, sort: str = "relevance"): | |
""" | |
Initialize the SemanticScholarSearch class with a query and sort criterion. | |
:param query: Search query string | |
:param sort: Sort criterion ('relevance', 'citationCount', 'publicationDate') | |
""" | |
self.query = query | |
assert sort in self.VALID_SORT_CRITERIA, "Invalid sort criterion" | |
self.sort = sort.lower() | |
def search(self, max_results: int = 20) -> List[Dict[str, str]]: | |
""" | |
Perform the search on Semantic Scholar and return results. | |
:param max_results: Maximum number of results to retrieve | |
:return: List of dictionaries containing title, href, and body of each paper | |
""" | |
params = { | |
"query": self.query, | |
"limit": max_results, | |
"fields": "title,abstract,url,venue,year,authors,isOpenAccess,openAccessPdf", | |
"sort": self.sort, | |
} | |
try: | |
response = requests.get(self.BASE_URL, params=params) | |
response.raise_for_status() | |
except requests.RequestException as e: | |
print(f"An error occurred while accessing Semantic Scholar API: {e}") | |
return [] | |
results = response.json().get("data", []) | |
search_result = [] | |
for result in results: | |
if result.get("isOpenAccess") and result.get("openAccessPdf"): | |
search_result.append( | |
{ | |
"title": result.get("title", "No Title"), | |
"href": result["openAccessPdf"].get("url", "No URL"), | |
"body": result.get("abstract", "Abstract not available"), | |
} | |
) | |
return search_result | |