Shreyas094's picture
Upload 528 files
372531f verified
from typing import Dict, List
import requests
class SemanticScholarSearch:
"""
Semantic Scholar API Retriever
"""
BASE_URL = "https://api.semanticscholar.org/graph/v1/paper/search"
VALID_SORT_CRITERIA = ["relevance", "citationCount", "publicationDate"]
def __init__(self, query: str, sort: str = "relevance"):
"""
Initialize the SemanticScholarSearch class with a query and sort criterion.
:param query: Search query string
:param sort: Sort criterion ('relevance', 'citationCount', 'publicationDate')
"""
self.query = query
assert sort in self.VALID_SORT_CRITERIA, "Invalid sort criterion"
self.sort = sort.lower()
def search(self, max_results: int = 20) -> List[Dict[str, str]]:
"""
Perform the search on Semantic Scholar and return results.
:param max_results: Maximum number of results to retrieve
:return: List of dictionaries containing title, href, and body of each paper
"""
params = {
"query": self.query,
"limit": max_results,
"fields": "title,abstract,url,venue,year,authors,isOpenAccess,openAccessPdf",
"sort": self.sort,
}
try:
response = requests.get(self.BASE_URL, params=params)
response.raise_for_status()
except requests.RequestException as e:
print(f"An error occurred while accessing Semantic Scholar API: {e}")
return []
results = response.json().get("data", [])
search_result = []
for result in results:
if result.get("isOpenAccess") and result.get("openAccessPdf"):
search_result.append(
{
"title": result.get("title", "No Title"),
"href": result["openAccessPdf"].get("url", "No URL"),
"body": result.get("abstract", "Abstract not available"),
}
)
return search_result