File size: 2,111 Bytes
372531f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from typing import Dict, List

import requests


class SemanticScholarSearch:
    """

    Semantic Scholar API Retriever

    """

    BASE_URL = "https://api.semanticscholar.org/graph/v1/paper/search"
    VALID_SORT_CRITERIA = ["relevance", "citationCount", "publicationDate"]

    def __init__(self, query: str, sort: str = "relevance"):
        """

        Initialize the SemanticScholarSearch class with a query and sort criterion.



        :param query: Search query string

        :param sort: Sort criterion ('relevance', 'citationCount', 'publicationDate')

        """
        self.query = query
        assert sort in self.VALID_SORT_CRITERIA, "Invalid sort criterion"
        self.sort = sort.lower()

    def search(self, max_results: int = 20) -> List[Dict[str, str]]:
        """

        Perform the search on Semantic Scholar and return results.



        :param max_results: Maximum number of results to retrieve

        :return: List of dictionaries containing title, href, and body of each paper

        """
        params = {
            "query": self.query,
            "limit": max_results,
            "fields": "title,abstract,url,venue,year,authors,isOpenAccess,openAccessPdf",
            "sort": self.sort,
        }

        try:
            response = requests.get(self.BASE_URL, params=params)
            response.raise_for_status()
        except requests.RequestException as e:
            print(f"An error occurred while accessing Semantic Scholar API: {e}")
            return []

        results = response.json().get("data", [])
        search_result = []

        for result in results:
            if result.get("isOpenAccess") and result.get("openAccessPdf"):
                search_result.append(
                    {
                        "title": result.get("title", "No Title"),
                        "href": result["openAccessPdf"].get("url", "No URL"),
                        "body": result.get("abstract", "Abstract not available"),
                    }
                )

        return search_result