Spaces:
Running
Running
Update search_utils.py
Browse files- search_utils.py +12 -11
search_utils.py
CHANGED
@@ -334,13 +334,9 @@ class MetadataManager:
|
|
334 |
return None
|
335 |
|
336 |
|
|
|
337 |
def _get_semantic_scholar_url(self, title):
|
338 |
-
"""Search Semantic Scholar API for a paper by title and return its URL.
|
339 |
-
|
340 |
-
It queries the Semantic Scholar API for a matching paper.
|
341 |
-
If the API provides a URL, that URL is returned.
|
342 |
-
If not, but a paperId is provided, construct the URL from the paperId.
|
343 |
-
"""
|
344 |
try:
|
345 |
response = requests.get(
|
346 |
"https://api.semanticscholar.org/graph/v1/paper/search",
|
@@ -351,22 +347,27 @@ class MetadataManager:
|
|
351 |
},
|
352 |
timeout=5
|
353 |
)
|
354 |
-
response.raise_for_status()
|
355 |
data = response.json()
|
356 |
|
357 |
-
# Check if we got at least one result
|
358 |
if "data" in data and len(data["data"]) > 0:
|
359 |
paper = data["data"][0]
|
360 |
-
# Prefer the provided URL if available
|
361 |
if paper.get("url"):
|
362 |
return paper["url"]
|
363 |
-
# Otherwise, build a URL using the paperId if available
|
364 |
elif paper.get("paperId"):
|
365 |
return f"https://www.semanticscholar.org/paper/{paper['paperId']}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
366 |
except Exception as e:
|
367 |
-
logger.error(f"Semantic Scholar API failed for '{title}': {
|
368 |
return None
|
369 |
|
|
|
370 |
|
371 |
class SemanticSearch:
|
372 |
def __init__(self):
|
|
|
334 |
return None
|
335 |
|
336 |
|
337 |
+
|
338 |
def _get_semantic_scholar_url(self, title):
|
339 |
+
"""Search Semantic Scholar API for a paper by title and return its URL."""
|
|
|
|
|
|
|
|
|
|
|
340 |
try:
|
341 |
response = requests.get(
|
342 |
"https://api.semanticscholar.org/graph/v1/paper/search",
|
|
|
347 |
},
|
348 |
timeout=5
|
349 |
)
|
350 |
+
response.raise_for_status() # This raises for 429 or other errors
|
351 |
data = response.json()
|
352 |
|
|
|
353 |
if "data" in data and len(data["data"]) > 0:
|
354 |
paper = data["data"][0]
|
|
|
355 |
if paper.get("url"):
|
356 |
return paper["url"]
|
|
|
357 |
elif paper.get("paperId"):
|
358 |
return f"https://www.semanticscholar.org/paper/{paper['paperId']}"
|
359 |
+
except requests.exceptions.HTTPError as http_err:
|
360 |
+
if response.status_code == 429:
|
361 |
+
# logger.error(f"Rate limit exceeded for Semantic Scholar API for '{title}'. Falling back.")
|
362 |
+
# Optionally, add a sleep delay here for backoff
|
363 |
+
time.sleep(1) # simple backoff delay; consider exponential backoff
|
364 |
+
else:
|
365 |
+
# logger.error(f"Semantic Scholar API failed for '{title}': {http_err}")
|
366 |
except Exception as e:
|
367 |
+
# logger.error(f"Semantic Scholar API failed for '{title}': {e}")
|
368 |
return None
|
369 |
|
370 |
+
|
371 |
|
372 |
class SemanticSearch:
|
373 |
def __init__(self):
|