Testys commited on
Commit
b1812af
·
1 Parent(s): 265c29d

Update search_utils.py

Browse files
Files changed (1) hide show
  1. search_utils.py +12 -11
search_utils.py CHANGED
@@ -334,13 +334,9 @@ class MetadataManager:
334
  return None
335
 
336
 
 
337
  def _get_semantic_scholar_url(self, title):
338
- """Search Semantic Scholar API for a paper by title and return its URL.
339
-
340
- It queries the Semantic Scholar API for a matching paper.
341
- If the API provides a URL, that URL is returned.
342
- If not, but a paperId is provided, construct the URL from the paperId.
343
- """
344
  try:
345
  response = requests.get(
346
  "https://api.semanticscholar.org/graph/v1/paper/search",
@@ -351,22 +347,27 @@ class MetadataManager:
351
  },
352
  timeout=5
353
  )
354
- response.raise_for_status()
355
  data = response.json()
356
 
357
- # Check if we got at least one result
358
  if "data" in data and len(data["data"]) > 0:
359
  paper = data["data"][0]
360
- # Prefer the provided URL if available
361
  if paper.get("url"):
362
  return paper["url"]
363
- # Otherwise, build a URL using the paperId if available
364
  elif paper.get("paperId"):
365
  return f"https://www.semanticscholar.org/paper/{paper['paperId']}"
 
 
 
 
 
 
 
366
  except Exception as e:
367
- logger.error(f"Semantic Scholar API failed for '{title}': {str(e)}")
368
  return None
369
 
 
370
 
371
  class SemanticSearch:
372
  def __init__(self):
 
334
  return None
335
 
336
 
337
+
338
  def _get_semantic_scholar_url(self, title):
339
+ """Search Semantic Scholar API for a paper by title and return its URL."""
 
 
 
 
 
340
  try:
341
  response = requests.get(
342
  "https://api.semanticscholar.org/graph/v1/paper/search",
 
347
  },
348
  timeout=5
349
  )
350
+ response.raise_for_status() # This raises for 429 or other errors
351
  data = response.json()
352
 
 
353
  if "data" in data and len(data["data"]) > 0:
354
  paper = data["data"][0]
 
355
  if paper.get("url"):
356
  return paper["url"]
 
357
  elif paper.get("paperId"):
358
  return f"https://www.semanticscholar.org/paper/{paper['paperId']}"
359
+ except requests.exceptions.HTTPError as http_err:
360
+ if response.status_code == 429:
361
+ # logger.error(f"Rate limit exceeded for Semantic Scholar API for '{title}'. Falling back.")
362
+ # Optionally, add a sleep delay here for backoff
363
+ time.sleep(1) # simple backoff delay; consider exponential backoff
364
+ else:
365
+ # logger.error(f"Semantic Scholar API failed for '{title}': {http_err}")
366
  except Exception as e:
367
+ # logger.error(f"Semantic Scholar API failed for '{title}': {e}")
368
  return None
369
 
370
+
371
 
372
  class SemanticSearch:
373
  def __init__(self):