Spaces:
Running
Running
DVampire
commited on
Commit
·
a878541
1
Parent(s):
49c88c9
update website
Browse files
src/crawl/huggingface_daily.py
CHANGED
@@ -18,10 +18,10 @@ class HuggingFaceDailyPapers:
|
|
18 |
"""Extract arXiv ID from a URL"""
|
19 |
if not url:
|
20 |
return None
|
21 |
-
#
|
22 |
-
m = re.search(r"
|
23 |
if m:
|
24 |
-
return m.group(
|
25 |
return None
|
26 |
|
27 |
def extract_json_data(self, html: str) -> Dict[str, Any]:
|
|
|
18 |
"""Extract arXiv ID from a URL"""
|
19 |
if not url:
|
20 |
return None
|
21 |
+
# matches https://huggingface.co/papers/2508.10711
|
22 |
+
m = re.search(r"huggingface\.co/papers/(\d{4,5}\.\d+)(v\d+)?", url)
|
23 |
if m:
|
24 |
+
return m.group(1)
|
25 |
return None
|
26 |
|
27 |
def extract_json_data(self, html: str) -> Dict[str, Any]:
|