DVampire commited on
Commit
a878541
·
1 Parent(s): 49c88c9

update website

Browse files
Files changed (1) hide show
  1. src/crawl/huggingface_daily.py +3 -3
src/crawl/huggingface_daily.py CHANGED
@@ -18,10 +18,10 @@ class HuggingFaceDailyPapers:
18
  """Extract arXiv ID from a URL"""
19
  if not url:
20
  return None
21
- # Matches /abs/2508.05629, /pdf/2508.05629.pdf
22
- m = re.search(r"arxiv\.org/(abs|pdf)/([0-9]{4}\.\d{4,5})(?:\.pdf)?", url)
23
  if m:
24
- return m.group(2)
25
  return None
26
 
27
  def extract_json_data(self, html: str) -> Dict[str, Any]:
 
18
  """Extract arXiv ID from a URL"""
19
  if not url:
20
  return None
21
+ # matches https://huggingface.co/papers/2508.10711
22
+ m = re.search(r"huggingface\.co/papers/(\d{4,5}\.\d+)(v\d+)?", url)
23
  if m:
24
+ return m.group(1)
25
  return None
26
 
27
  def extract_json_data(self, html: str) -> Dict[str, Any]: