Commit
·
4c7c0d6
1
Parent(s):
3e5aa13
Retries for the HF snapshot download
Browse files- src/leaderboard/populate.py +26 -9
src/leaderboard/populate.py
CHANGED
@@ -1,7 +1,10 @@
|
|
1 |
import json
|
|
|
|
|
2 |
|
3 |
import pandas as pd
|
4 |
from huggingface_hub import snapshot_download
|
|
|
5 |
|
6 |
from src.envs import RESULTS_REPO_ID, RESULTS_REPO_PATH, TOKEN
|
7 |
from src.leaderboard.utils import COLUMNS
|
@@ -9,15 +12,29 @@ from src.leaderboard.utils import COLUMNS
|
|
9 |
|
10 |
def download_result_data():
|
11 |
print(f"Downloading {RESULTS_REPO_ID} to {RESULTS_REPO_PATH}")
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
|
23 |
def load_results() -> pd.DataFrame:
|
|
|
1 |
import json
|
2 |
+
import os
|
3 |
+
import time
|
4 |
|
5 |
import pandas as pd
|
6 |
from huggingface_hub import snapshot_download
|
7 |
+
from requests.exceptions import ConnectionError, ReadTimeout
|
8 |
|
9 |
from src.envs import RESULTS_REPO_ID, RESULTS_REPO_PATH, TOKEN
|
10 |
from src.leaderboard.utils import COLUMNS
|
|
|
12 |
|
13 |
def download_result_data():
|
14 |
print(f"Downloading {RESULTS_REPO_ID} to {RESULTS_REPO_PATH}")
|
15 |
+
# Set env vars to configure huggingface_hub
|
16 |
+
os.environ["HF_HUB_ETAG_TIMEOUT"] = "30"
|
17 |
+
os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "30"
|
18 |
+
|
19 |
+
while True:
|
20 |
+
try:
|
21 |
+
snapshot_download(
|
22 |
+
repo_id=RESULTS_REPO_ID,
|
23 |
+
local_dir=RESULTS_REPO_PATH,
|
24 |
+
repo_type="dataset",
|
25 |
+
tqdm_class=None,
|
26 |
+
etag_timeout=30,
|
27 |
+
token=TOKEN,
|
28 |
+
allow_patterns=["results/*"],
|
29 |
+
)
|
30 |
+
break
|
31 |
+
except Exception as e:
|
32 |
+
if isinstance(e, (ReadTimeout, ConnectionError)):
|
33 |
+
time.sleep(3)
|
34 |
+
print(f"Connection error: {e}. Retrying...")
|
35 |
+
continue
|
36 |
+
else:
|
37 |
+
raise e
|
38 |
|
39 |
|
40 |
def load_results() -> pd.DataFrame:
|