Yuxuan-Zhang-Dexter commited on
Commit
dafeb92
·
1 Parent(s): bbddc27

update model names

Browse files
assets/model_color.json CHANGED
@@ -13,8 +13,8 @@
13
  "o1-2024-12-17": "#4DB6AC",
14
  "o1-mini-2024-09-12": "#26A69A",
15
  "o3-mini-2025-01-31(medium)": "#80CBC4",
16
- "o3": "#26C6DA",
17
- "o4-mini": "#00ACC1",
18
  "grok-3-beta": "#FF7043",
19
  "deepseek-v3": "#FFC107",
20
  "deepseek-r1": "#FFA000",
 
13
  "o1-2024-12-17": "#4DB6AC",
14
  "o1-mini-2024-09-12": "#26A69A",
15
  "o3-mini-2025-01-31(medium)": "#80CBC4",
16
+ "o3-2025-04-16": "#26C6DA",
17
+ "o4-mini-2025-04-16": "#00ACC1",
18
  "grok-3-beta": "#FF7043",
19
  "deepseek-v3": "#FFC107",
20
  "deepseek-r1": "#FFA000",
rank_data_03_25_2025.json CHANGED
@@ -50,7 +50,7 @@
50
  "runs": 1,
51
  "results": [
52
  {
53
- "model": "o3",
54
  "score": 256,
55
  "steps": 108,
56
  "time": "58:09",
@@ -120,7 +120,7 @@
120
  "rank": 10
121
  },
122
  {
123
- "model": "o4-mini",
124
  "score": 128,
125
  "steps": "",
126
  "time": "",
@@ -218,14 +218,14 @@
218
  "runs": 3,
219
  "results": [
220
  {
221
- "model": "o4-mini",
222
  "score_runs": "123,131",
223
  "average_score": 127,
224
  "steps": 25,
225
  "rank": 1
226
  },
227
  {
228
- "model": "o3",
229
  "score_runs": "115, 122",
230
  "average_score": 118.5,
231
  "steps": 25,
@@ -321,7 +321,7 @@
321
  "runs": 3,
322
  "results": [
323
  {
324
- "model": "o3",
325
  "levels_cracked": "5",
326
  "steps": "[16, 40, 59, 110]",
327
  "rank": 1
@@ -345,7 +345,7 @@
345
  "rank": 4
346
  },
347
  {
348
- "model": "o4-mini",
349
  "levels_cracked": "2",
350
  "steps": "",
351
  "rank": 5
@@ -434,7 +434,7 @@
434
  "note": "stuck at the end not present evidence"
435
  },
436
  {
437
- "model": "o3",
438
  "levels_cracked": "3",
439
  "lives_left": "[5, 3, 3, 0]",
440
  "cracked_details": "4: 4/8",
@@ -506,7 +506,7 @@
506
  "note": "stuck in the 3rd evidence present"
507
  },
508
  {
509
- "model": "o4-mini",
510
  "levels_cracked": "0",
511
  "lives_left": "0",
512
  "cracked_details": "1:1/5",
 
50
  "runs": 1,
51
  "results": [
52
  {
53
+ "model": "o3-2025-04-16",
54
  "score": 256,
55
  "steps": 108,
56
  "time": "58:09",
 
120
  "rank": 10
121
  },
122
  {
123
+ "model": "o4-mini-2025-04-16",
124
  "score": 128,
125
  "steps": "",
126
  "time": "",
 
218
  "runs": 3,
219
  "results": [
220
  {
221
+ "model": "o4-mini-2025-04-16",
222
  "score_runs": "123,131",
223
  "average_score": 127,
224
  "steps": 25,
225
  "rank": 1
226
  },
227
  {
228
+ "model": "o3-2025-04-16",
229
  "score_runs": "115, 122",
230
  "average_score": 118.5,
231
  "steps": 25,
 
321
  "runs": 3,
322
  "results": [
323
  {
324
+ "model": "o3-2025-04-16",
325
  "levels_cracked": "5",
326
  "steps": "[16, 40, 59, 110]",
327
  "rank": 1
 
345
  "rank": 4
346
  },
347
  {
348
+ "model": "o4-mini-2025-04-16",
349
  "levels_cracked": "2",
350
  "steps": "",
351
  "rank": 5
 
434
  "note": "stuck at the end not present evidence"
435
  },
436
  {
437
+ "model": "o3-2025-04-16",
438
  "levels_cracked": "3",
439
  "lives_left": "[5, 3, 3, 0]",
440
  "cracked_details": "4: 4/8",
 
506
  "note": "stuck in the 3rd evidence present"
507
  },
508
  {
509
+ "model": "o4-mini-2025-04-16",
510
  "levels_cracked": "0",
511
  "lives_left": "0",
512
  "cracked_details": "1:1/5",