boatbomber commited on
Commit
4c7c0d6
·
1 Parent(s): 3e5aa13

Retries for the HF snapshot download

Browse files
Files changed (1) hide show
  1. src/leaderboard/populate.py +26 -9
src/leaderboard/populate.py CHANGED
@@ -1,7 +1,10 @@
1
  import json
 
 
2
 
3
  import pandas as pd
4
  from huggingface_hub import snapshot_download
 
5
 
6
  from src.envs import RESULTS_REPO_ID, RESULTS_REPO_PATH, TOKEN
7
  from src.leaderboard.utils import COLUMNS
@@ -9,15 +12,29 @@ from src.leaderboard.utils import COLUMNS
9
 
10
  def download_result_data():
11
  print(f"Downloading {RESULTS_REPO_ID} to {RESULTS_REPO_PATH}")
12
- snapshot_download(
13
- repo_id=RESULTS_REPO_ID,
14
- local_dir=RESULTS_REPO_PATH,
15
- repo_type="dataset",
16
- tqdm_class=None,
17
- etag_timeout=30,
18
- token=TOKEN,
19
- allow_patterns=["results/*"],
20
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
 
23
  def load_results() -> pd.DataFrame:
 
1
  import json
2
+ import os
3
+ import time
4
 
5
  import pandas as pd
6
  from huggingface_hub import snapshot_download
7
+ from requests.exceptions import ConnectionError, ReadTimeout
8
 
9
  from src.envs import RESULTS_REPO_ID, RESULTS_REPO_PATH, TOKEN
10
  from src.leaderboard.utils import COLUMNS
 
12
 
13
  def download_result_data():
14
  print(f"Downloading {RESULTS_REPO_ID} to {RESULTS_REPO_PATH}")
15
+ # Set env vars to configure huggingface_hub
16
+ os.environ["HF_HUB_ETAG_TIMEOUT"] = "30"
17
+ os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "30"
18
+
19
+ while True:
20
+ try:
21
+ snapshot_download(
22
+ repo_id=RESULTS_REPO_ID,
23
+ local_dir=RESULTS_REPO_PATH,
24
+ repo_type="dataset",
25
+ tqdm_class=None,
26
+ etag_timeout=30,
27
+ token=TOKEN,
28
+ allow_patterns=["results/*"],
29
+ )
30
+ break
31
+ except Exception as e:
32
+ if isinstance(e, (ReadTimeout, ConnectionError)):
33
+ time.sleep(3)
34
+ print(f"Connection error: {e}. Retrying...")
35
+ continue
36
+ else:
37
+ raise e
38
 
39
 
40
  def load_results() -> pd.DataFrame: