karimouda commited on
Commit
8475783
·
1 Parent(s): 11c7818

fixing html files handling when loading

Browse files
requests/HuggingFaceTB/SmolLM2-135M-Instruct_eval_request.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "model": "HuggingFaceTB/SmolLM2-135M-Instruct",
3
- "model_sha": "e2c3f7557efbdec707ae3a336371d169783f1da1",
4
- "status": "FINISHED",
5
- "submitted_time": "2025-04-21T17:18:59Z",
6
- "likes": 178,
7
- "params": 0.135,
8
- "license": "apache-2.0"
9
- }
 
 
 
 
 
 
 
 
 
 
requests/Qwen/Qwen2.5-0.5B-Instruct_eval_request.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "model": "Qwen/Qwen2.5-0.5B-Instruct",
3
- "model_sha": "7ae557604adf67be50417f59c2c2f167def9a775",
4
- "status": "FINISHED",
5
- "submitted_time": "2025-04-21T14:43:01Z",
6
- "likes": 310,
7
- "params": 0.494,
8
- "license": "apache-2.0"
9
- }
 
 
 
 
 
 
 
 
 
 
requests/open-ai/gpt-3.5-turbo_eval_request.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "model": "open-ai/gpt-3.5-turbo",
3
- "model_sha": "NA",
4
- "status": "FINISHED",
5
- "submitted_time": "2025-04-26 16:56:14",
6
- "likes": -1,
7
- "params": 999,
8
- "license": "closed"
9
- }
 
 
 
 
 
 
 
 
 
 
requests/openai-community/gpt2_eval_request.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "model": "openai-community/gpt2",
3
- "model_sha": "607a30d783dfa663caf39e06633721c8d4cfcd7e",
4
- "status": "FINISHED",
5
- "submitted_time": "2025-04-21T14:50:23Z",
6
- "likes": 2679,
7
- "params": 0.137,
8
- "license": "mit"
9
- }
 
 
 
 
 
 
 
 
 
 
results/HuggingFaceTB/SmolLM2-135M-Instruct_results_2025-04-21 17:27:52.203995.json DELETED
@@ -1,38 +0,0 @@
1
- {
2
- "results": {
3
- "average_score": 3.0,
4
- "speed": 0.8272417944325482,
5
- "contamination_score": 0,
6
- "execution_time": 386.321918,
7
- "errors": [],
8
- "scores_by_category": [
9
- {
10
- "category": "Function Calling",
11
- "average_score": 6.0,
12
- "count": 2
13
- },
14
- {
15
- "category": "Reasoning & Math",
16
- "average_score": 1.0,
17
- "count": 3
18
- }
19
- ],
20
- "scores_by_format": [
21
- {
22
- "format": "Generation",
23
- "average_score": 3.0,
24
- "count": 5
25
- }
26
- ]
27
- },
28
- "config": {
29
- "model": "HuggingFaceTB/SmolLM2-135M-Instruct",
30
- "model_sha": "e2c3f7557efbdec707ae3a336371d169783f1da1",
31
- "submitted_time": "2025-04-21T17:18:59Z",
32
- "likes": 178,
33
- "params": 0.135,
34
- "license": "apache-2.0",
35
- "model_source": "Hugging Face",
36
- "model_category": "Nano"
37
- }
38
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
results/Qwen/Qwen2.5-0.5B-Instruct_results_2025-04-21 16:50:28.595317.json DELETED
@@ -1,38 +0,0 @@
1
- {
2
- "results": {
3
- "average_score": 6.0,
4
- "speed": 5,
5
- "contamination_score": 0,
6
- "execution_time": 88.587424,
7
- "errors": [],
8
- "scores_by_category": [
9
- {
10
- "category": "Function Calling",
11
- "average_score": 5.5,
12
- "count": 2
13
- },
14
- {
15
- "category": "Reasoning & Math",
16
- "average_score": 1.6666666666666667,
17
- "count": 3
18
- }
19
- ],
20
- "scores_by_format": [
21
- {
22
- "format": "Generation",
23
- "average_score": 3.2,
24
- "count": 5
25
- }
26
- ]
27
- },
28
- "config": {
29
- "model": "Qwen/Qwen2.5-0.5B-Instruct",
30
- "model_sha": "7ae557604adf67be50417f59c2c2f167def9a775",
31
- "model_source": "Hugging Face",
32
- "model_category": "Nano",
33
- "submitted_time": "2025-04-21T14:43:01Z",
34
- "likes": 310,
35
- "params": 0.494,
36
- "license": "apache-2.0"
37
- }
38
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
results/open-ai/gpt-3.5-turbo_abb_benchmark_answers_2025-04-26 17:17:28.074158+00:00.html DELETED
The diff for this file is too large to render. See raw diff
 
results/open-ai/gpt-3.5-turbo_results_2025-04-26 17:17:26.272549+00:00.json DELETED
The diff for this file is too large to render. See raw diff
 
results/openai-community/gpt2_results_2025-04-21 16:59:47.547731.json DELETED
@@ -1,38 +0,0 @@
1
- {
2
- "results": {
3
- "average_score": 1.0,
4
- "speed": 1.1064065631691649,
5
- "contamination_score": 0,
6
- "execution_time": 516.691865,
7
- "errors": [],
8
- "scores_by_category": [
9
- {
10
- "category": "Function Calling",
11
- "average_score": 1.0,
12
- "count": 2
13
- },
14
- {
15
- "category": "Reasoning & Math",
16
- "average_score": 1.0,
17
- "count": 3
18
- }
19
- ],
20
- "scores_by_format": [
21
- {
22
- "format": "Generation",
23
- "average_score": 1.0,
24
- "count": 5
25
- }
26
- ]
27
- },
28
- "config": {
29
- "model": "openai-community/gpt2",
30
- "model_sha": "607a30d783dfa663caf39e06633721c8d4cfcd7e",
31
- "model_source": "Hugging Face",
32
- "model_category": "Nano",
33
- "submitted_time": "2025-04-21T14:50:23Z",
34
- "likes": 2679,
35
- "params": 0.137,
36
- "license": "mit"
37
- }
38
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/leaderboard/read_evals.py CHANGED
@@ -168,16 +168,19 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
168
  model_result_filepaths = []
169
 
170
  for root, _, files in os.walk(results_path):
171
- # We should only have json files in model results
172
- if len(files) == 0 or any([not f.endswith(".json") for f in files]):
173
- continue
 
 
174
 
175
  # Sort the files by date
176
  try:
177
  files.sort(key=lambda x: x.removesuffix(".json").removeprefix("results_")[:-7])
178
- except dateutil.parser._parser.ParserError:
 
179
  files = [files[-1]]
180
-
181
  for file in files:
182
  model_result_filepaths.append(os.path.join(root, file))
183
 
 
168
  model_result_filepaths = []
169
 
170
  for root, _, files in os.walk(results_path):
171
+ print("HERE",files)
172
+ # We should only have json files in model results ##we allow HTML files
173
+ #if len(files) == 0 or any([not f.endswith(".json") for f in files]):
174
+ # continue
175
+ files = [f for f in files if f.endswith(".json")]
176
 
177
  # Sort the files by date
178
  try:
179
  files.sort(key=lambda x: x.removesuffix(".json").removeprefix("results_")[:-7])
180
+ except dateutil.parser._parser.ParserError as e:
181
+ print("Error",e)
182
  files = [files[-1]]
183
+ print(files)
184
  for file in files:
185
  model_result_filepaths.append(os.path.join(root, file))
186