Your Name commited on
Commit
ad59b21
ยท
1 Parent(s): ab0bf39

update new leaderboard

Browse files
Files changed (6) hide show
  1. .DS_Store +0 -0
  2. app.py +0 -0
  3. assets/model_color.json +55 -28
  4. gallery_tab.py +284 -284
  5. leaderboard_utils.py +6 -0
  6. rank_data_03_25_2025.json +263 -127
.DS_Store ADDED
Binary file (6.15 kB). View file
 
app.py CHANGED
File without changes
assets/model_color.json CHANGED
@@ -4,6 +4,7 @@
4
  "claude-3-5-sonnet-20241022": "#1A4C7C",
5
  "claude-opus-4-20250514": "#3A80D2",
6
  "claude-sonnet-4-20250514": "#5A9FE2",
 
7
  "gemini-2.0-flash": "#FF4081",
8
  "gemini-2.0-flash-thinking-exp-1219": "#C2185B",
9
  "gemini-2.5-pro-exp-03-25": "#FF80AB",
@@ -11,47 +12,73 @@
11
  "gemini-2.5-flash-preview-05-20": "#F8BBD9",
12
  "gemini-2.5-pro-preview-05-06": "#AD1457",
13
  "gemini-2.5-pro-preview-06-05": "#EC407A",
 
14
  "gpt-4o-2024-11-20": "#00BFA5",
15
  "gpt-4.5-preview-2025-02-27": "#00796B",
16
  "gpt-4.1-2025-04-14": "#00897B",
 
 
 
17
  "o1-2024-12-17": "#4DB6AC",
18
  "o1-mini-2024-09-12": "#26A69A",
19
  "o3-mini-2025-01-31(medium)": "#80CBC4",
20
  "o3-2025-04-16": "#26C6DA",
21
  "o4-mini-2025-04-16": "#00ACC1",
 
22
  "grok-3-beta": "#FF7043",
23
  "grok-3-mini-beta": "#FF8A65",
 
 
24
  "deepseek-v3": "#FFC107",
25
  "deepseek-r1-0120": "#FFA000",
26
  "deepseek-r1-0528": "#FFB300",
 
27
  "llama-4-maverick-17b-128e-instruct-fp8": "#8E24AA",
28
  "qwen3-235B-A22B-fp8": "#6A1B9A",
 
 
 
 
29
  "random (x30)": "#9E9E9E",
30
- "๐ŸŽฎ claude-3-7-sonnet-20250219 (GamingAgent)": "#4A90E2",
31
- "๐ŸŽฎ claude-3-5-haiku-20241022 (GamingAgent)": "#7FB5E6",
32
- "๐ŸŽฎ claude-3-5-sonnet-20241022 (GamingAgent)": "#1A4C7C",
33
- "๐ŸŽฎ claude-opus-4-20250514 (GamingAgent)": "#3A80D2",
34
- "๐ŸŽฎ claude-sonnet-4-20250514 (GamingAgent)": "#5A9FE2",
35
- "๐ŸŽฎ gemini-2.0-flash (GamingAgent)": "#FF4081",
36
- "๐ŸŽฎ gemini-2.0-flash-thinking-exp-1219 (GamingAgent)": "#C2185B",
37
- "๐ŸŽฎ gemini-2.5-pro-exp-03-25 (GamingAgent)": "#FF80AB",
38
- "๐ŸŽฎ gemini-2.5-flash-preview-04-17 (GamingAgent)": "#F06292",
39
- "๐ŸŽฎ gemini-2.5-flash-preview-05-20 (GamingAgent)": "#F8BBD9",
40
- "๐ŸŽฎ gemini-2.5-pro-preview-05-06 (GamingAgent)": "#AD1457",
41
- "๐ŸŽฎ gemini-2.5-pro-preview-06-05 (GamingAgent)": "#EC407A",
42
- "๐ŸŽฎ gpt-4o-2024-11-20 (GamingAgent)": "#00BFA5",
43
- "๐ŸŽฎ gpt-4.5-preview-2025-02-27 (GamingAgent)": "#00796B",
44
- "๐ŸŽฎ gpt-4.1-2025-04-14 (GamingAgent)": "#00897B",
45
- "๐ŸŽฎ o1-2024-12-17 (GamingAgent)": "#4DB6AC",
46
- "๐ŸŽฎ o1-mini-2024-09-12 (GamingAgent)": "#26A69A",
47
- "๐ŸŽฎ o3-mini-2025-01-31(medium) (GamingAgent)": "#80CBC4",
48
- "๐ŸŽฎ o3-2025-04-16 (GamingAgent)": "#26C6DA",
49
- "๐ŸŽฎ o4-mini-2025-04-16 (GamingAgent)": "#00ACC1",
50
- "๐ŸŽฎ grok-3-beta (GamingAgent)": "#FF7043",
51
- "๐ŸŽฎ grok-3-mini-beta (GamingAgent)": "#FF8A65",
52
- "๐ŸŽฎ deepseek-v3 (GamingAgent)": "#FFC107",
53
- "๐ŸŽฎ deepseek-r1-0120 (GamingAgent)": "#FFA000",
54
- "๐ŸŽฎ deepseek-r1-0528 (GamingAgent)": "#FFB300",
55
- "๐ŸŽฎ llama-4-maverick-17b-128e-instruct-fp8 (GamingAgent)": "#8E24AA",
56
- "๐ŸŽฎ qwen3-235B-A22B-fp8 (GamingAgent)": "#6A1B9A"
57
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  "claude-3-5-sonnet-20241022": "#1A4C7C",
5
  "claude-opus-4-20250514": "#3A80D2",
6
  "claude-sonnet-4-20250514": "#5A9FE2",
7
+
8
  "gemini-2.0-flash": "#FF4081",
9
  "gemini-2.0-flash-thinking-exp-1219": "#C2185B",
10
  "gemini-2.5-pro-exp-03-25": "#FF80AB",
 
12
  "gemini-2.5-flash-preview-05-20": "#F8BBD9",
13
  "gemini-2.5-pro-preview-05-06": "#AD1457",
14
  "gemini-2.5-pro-preview-06-05": "#EC407A",
15
+
16
  "gpt-4o-2024-11-20": "#00BFA5",
17
  "gpt-4.5-preview-2025-02-27": "#00796B",
18
  "gpt-4.1-2025-04-14": "#00897B",
19
+ "gpt-oss-20b": "#4DD0E1",
20
+ "gpt-oss-120b": "#00838F",
21
+
22
  "o1-2024-12-17": "#4DB6AC",
23
  "o1-mini-2024-09-12": "#26A69A",
24
  "o3-mini-2025-01-31(medium)": "#80CBC4",
25
  "o3-2025-04-16": "#26C6DA",
26
  "o4-mini-2025-04-16": "#00ACC1",
27
+
28
  "grok-3-beta": "#FF7043",
29
  "grok-3-mini-beta": "#FF8A65",
30
+ "grok-4-0709": "#FF7043",
31
+
32
  "deepseek-v3": "#FFC107",
33
  "deepseek-r1-0120": "#FFA000",
34
  "deepseek-r1-0528": "#FFB300",
35
+
36
  "llama-4-maverick-17b-128e-instruct-fp8": "#8E24AA",
37
  "qwen3-235B-A22B-fp8": "#6A1B9A",
38
+ "kimi-k2-0711-preview": "#FA1B9A",
39
+
40
+ "glm-4.5": "#4CAF50",
41
+
42
  "random (x30)": "#9E9E9E",
43
+
44
+ "๐ŸŽฎ claude-3-7-sonnet-20250219": "#4A90E2",
45
+ "๐ŸŽฎ claude-3-5-haiku-20241022": "#7FB5E6",
46
+ "๐ŸŽฎ claude-3-5-sonnet-20241022": "#1A4C7C",
47
+ "๐ŸŽฎ claude-opus-4-20250514": "#3A80D2",
48
+ "๐ŸŽฎ claude-sonnet-4-20250514": "#5A9FE2",
49
+
50
+ "๐ŸŽฎ gemini-2.0-flash": "#FF4081",
51
+ "๐ŸŽฎ gemini-2.0-flash-thinking-exp-1219": "#C2185B",
52
+ "๐ŸŽฎ gemini-2.5-pro-exp-03-25": "#FF80AB",
53
+ "๐ŸŽฎ gemini-2.5-flash-preview-04-17": "#F06292",
54
+ "๐ŸŽฎ gemini-2.5-flash-preview-05-20": "#F8BBD9",
55
+ "๐ŸŽฎ gemini-2.5-pro-preview-05-06": "#AD1457",
56
+ "๐ŸŽฎ gemini-2.5-pro-preview-06-05": "#EC407A",
57
+
58
+ "๐ŸŽฎ gpt-4o-2024-11-20": "#00BFA5",
59
+ "๐ŸŽฎ gpt-4.5-preview-2025-02-27": "#00796B",
60
+ "๐ŸŽฎ gpt-4.1-2025-04-14": "#00897B",
61
+ "๐ŸŽฎ gpt-oss-20b": "#4DD0E1",
62
+ "๐ŸŽฎ gpt-oss-120b": "#00838F",
63
+
64
+ "๐ŸŽฎ o1-2024-12-17": "#4DB6AC",
65
+ "๐ŸŽฎ o1-mini-2024-09-12": "#26A69A",
66
+ "๐ŸŽฎ o3-mini-2025-01-31(medium)": "#80CBC4",
67
+ "๐ŸŽฎ o3-2025-04-16": "#26C6DA",
68
+ "๐ŸŽฎ o4-mini-2025-04-16": "#00ACC1",
69
+
70
+ "๐ŸŽฎ grok-3-beta": "#FF7043",
71
+ "๐ŸŽฎ grok-3-mini-beta": "#FF8A65",
72
+ "๐ŸŽฎ grok-4-0709": "#FF7043",
73
+
74
+ "๐ŸŽฎ deepseek-v3": "#FFC107",
75
+ "๐ŸŽฎ deepseek-r1-0120": "#FFA000",
76
+ "๐ŸŽฎ deepseek-r1-0528": "#FFB300",
77
+
78
+ "๐ŸŽฎ llama-4-maverick-17b-128e-instruct-fp8": "#8E24AA",
79
+ "๐ŸŽฎ qwen3-235B-A22B-fp8": "#6A1B9A",
80
+ "๐ŸŽฎ kimi-k2-0711-preview": "#FA1B9A",
81
+
82
+ "๐ŸŽฎ glm-4.5": "#4CAF50"
83
+ }
84
+
gallery_tab.py CHANGED
@@ -1,285 +1,285 @@
1
- import gradio as gr
2
- from datetime import datetime
3
- import json
4
-
5
- # Load video links and news data
6
- with open('assets/game_video_link.json', 'r') as f:
7
- VIDEO_LINKS = json.load(f)
8
-
9
- with open('assets/news.json', 'r') as f:
10
- NEWS_DATA = json.load(f)
11
-
12
- def create_video_gallery():
13
- """Create a custom HTML/JS component for video gallery"""
14
- # Extract video IDs
15
- mario_id = VIDEO_LINKS["super_mario_bros"].split("?v=")[1]
16
- sokoban_id = VIDEO_LINKS["sokoban"].split("?v=")[1]
17
- game_2048_id = VIDEO_LINKS["2048"].split("?v=")[1]
18
- candy_id = VIDEO_LINKS["candy"].split("?v=")[1]
19
- ace_attorney_id = VIDEO_LINKS["ace_attorney"].split("?v=")[1]
20
- tetris_id = VIDEO_LINKS["tetris"].split("?v=")[1]
21
-
22
- # Get the latest video from news data
23
- latest_news = NEWS_DATA["news"][0] # First item is the latest
24
- latest_video_id = latest_news["video_link"].split("?v=")[1]
25
- latest_date = datetime.strptime(latest_news["date"], "%Y-%m-%d")
26
- formatted_latest_date = latest_date.strftime("%B %d, %Y")
27
-
28
- # Generate news HTML
29
- news_items = []
30
- for item in NEWS_DATA["news"]:
31
- video_id = item["video_link"].split("?v=")[1]
32
- date_obj = datetime.strptime(item["date"], "%Y-%m-%d")
33
- formatted_date = date_obj.strftime("%B %d, %Y")
34
- news_items.append(f'''
35
- <div class="news-item">
36
- <div class="news-date">{formatted_date}</div>
37
- <div class="news-content">
38
- <div class="news-video">
39
- <div class="video-wrapper">
40
- <iframe src="https://www.youtube.com/embed/{video_id}"></iframe>
41
- </div>
42
- </div>
43
- <div class="news-text">
44
- <a href="{item["twitter_link"]}" target="_blank" class="twitter-link">
45
- <span class="twitter-icon">๐Ÿ“ข</span>
46
- {item["twitter_text"]}
47
- </a>
48
- </div>
49
- </div>
50
- </div>
51
- ''')
52
-
53
- news_html = '\n'.join(news_items)
54
-
55
- gallery_html = f'''
56
- <div class="video-gallery-container">
57
- <style>
58
- .video-gallery-container {{
59
- width: 100%;
60
- max-width: 1400px;
61
- margin: 0 auto;
62
- padding: 20px;
63
- }}
64
- .highlight-section {{
65
- margin-bottom: 40px;
66
- }}
67
- .highlight-card {{
68
- background: #ffffff;
69
- border-radius: 10px;
70
- box-shadow: 0 4px 20px rgba(0,0,0,0.15);
71
- overflow: hidden;
72
- transition: transform 0.3s;
73
- border: 2px solid #2196F3;
74
- }}
75
- .highlight-card:hover {{
76
- transform: translateY(-5px);
77
- }}
78
- .highlight-header {{
79
- background: #2196F3;
80
- color: white;
81
- padding: 15px 20px;
82
- font-size: 1.2em;
83
- font-weight: bold;
84
- display: flex;
85
- align-items: center;
86
- gap: 10px;
87
- }}
88
- .highlight-date {{
89
- font-size: 0.9em;
90
- opacity: 0.9;
91
- }}
92
- .highlight-content {{
93
- padding: 20px;
94
- }}
95
- .video-grid {{
96
- display: grid;
97
- grid-template-columns: repeat(2, 1fr);
98
- gap: 20px;
99
- margin-top: 20px;
100
- margin-bottom: 40px;
101
- }}
102
- .video-card {{
103
- background: var(--card-bg, #ffffff);
104
- border-radius: 10px;
105
- box-shadow: 0 2px 10px rgba(0,0,0,0.1);
106
- overflow: hidden;
107
- transition: transform 0.2s;
108
- }}
109
- .video-card:hover {{
110
- transform: translateY(-5px);
111
- }}
112
- .video-wrapper {{
113
- position: relative;
114
- padding-bottom: 56.25%;
115
- height: 0;
116
- overflow: hidden;
117
- }}
118
- .video-wrapper iframe {{
119
- position: absolute;
120
- top: 0;
121
- left: 0;
122
- width: 100%;
123
- height: 100%;
124
- border: none;
125
- }}
126
- .video-title {{
127
- padding: 15px;
128
- font-size: 1.2em;
129
- font-weight: bold;
130
- color: var(--title-text, #2c3e50);
131
- text-align: center;
132
- background: var(--title-bg, #f8f9fa);
133
- border-top: 1px solid var(--border-color, #eee);
134
- }}
135
- .news-section {{
136
- margin-top: 40px;
137
- border-top: 2px solid #e9ecef;
138
- padding-top: 20px;
139
- }}
140
- .news-section-title {{
141
- font-size: 1.8em;
142
- font-weight: bold;
143
- color: #2c3e50;
144
- margin-bottom: 20px;
145
- text-align: center;
146
- }}
147
- .news-item {{
148
- background: #ffffff;
149
- border-radius: 10px;
150
- box-shadow: 0 2px 10px rgba(0,0,0,0.1);
151
- margin-bottom: 20px;
152
- overflow: hidden;
153
- }}
154
- .news-date {{
155
- padding: 10px 20px;
156
- background: #f8f9fa;
157
- color: #666;
158
- font-size: 0.9em;
159
- border-bottom: 1px solid #eee;
160
- }}
161
- .news-content {{
162
- display: flex;
163
- padding: 20px;
164
- align-items: center;
165
- gap: 30px;
166
- }}
167
- .news-video {{
168
- flex: 0 0 300px;
169
- }}
170
- .news-text {{
171
- flex: 1;
172
- display: flex;
173
- align-items: center;
174
- min-height: 169px;
175
- }}
176
- .twitter-link {{
177
- color: #2c3e50;
178
- text-decoration: none;
179
- display: flex;
180
- align-items: center;
181
- gap: 15px;
182
- font-size: 1.4em;
183
- font-weight: 600;
184
- line-height: 1.4;
185
- }}
186
- .twitter-link:hover {{
187
- color: #1da1f2;
188
- }}
189
- .twitter-icon {{
190
- font-size: 1.5em;
191
- color: #1da1f2;
192
- }}
193
-
194
- /* Dark mode specific styles */
195
- .dark .video-card {{
196
- --card-bg: #2d3748;
197
- --title-bg: #1a202c;
198
- --title-text: #e2e8f0;
199
- --border-color: #4a5568;
200
- }}
201
-
202
- /* Light mode specific styles */
203
- .light .video-card {{
204
- --card-bg: #ffffff;
205
- --title-bg: #f8f9fa;
206
- --title-text: #2c3e50;
207
- --border-color: #eee;
208
- }}
209
- </style>
210
-
211
- <!-- Highlight Section -->
212
- <div class="highlight-section">
213
- <div class="highlight-card">
214
- <div class="highlight-header">
215
- <span>๐ŸŒŸ Latest Update</span>
216
- <span class="highlight-date">{formatted_latest_date}</span>
217
- </div>
218
- <div class="highlight-content">
219
- <div class="video-wrapper">
220
- <iframe src="https://www.youtube.com/embed/{latest_video_id}"></iframe>
221
- </div>
222
- <div class="video-title">
223
- <a href="{latest_news["twitter_link"]}" target="_blank" class="twitter-link">
224
- <span class="twitter-icon">๐Ÿ“ข</span>
225
- {latest_news["twitter_text"]}
226
- </a>
227
- </div>
228
- </div>
229
- </div>
230
- </div>
231
-
232
- <!-- Regular Video Grid -->
233
- <div class="video-grid">
234
- <div class="video-card">
235
- <div class="video-wrapper">
236
- <iframe src="https://www.youtube.com/embed/{mario_id}"></iframe>
237
- </div>
238
- <div class="video-title">๐ŸŽฎ Super Mario Bros</div>
239
- </div>
240
- <div class="video-card">
241
- <div class="video-wrapper">
242
- <iframe src="https://www.youtube.com/embed/{sokoban_id}"></iframe>
243
- </div>
244
- <div class="video-title">๐Ÿ“ฆ Sokoban</div>
245
- </div>
246
- <div class="video-card">
247
- <div class="video-wrapper">
248
- <iframe src="https://www.youtube.com/embed/{game_2048_id}"></iframe>
249
- </div>
250
- <div class="video-title">๐Ÿ”ข 2048</div>
251
- </div>
252
- <div class="video-card">
253
- <div class="video-wrapper">
254
- <iframe src="https://www.youtube.com/embed/{candy_id}"></iframe>
255
- </div>
256
- <div class="video-title">๐Ÿฌ Candy Crush</div>
257
- </div>
258
- <div class="video-card">
259
- <div class="video-wrapper">
260
- <iframe src="https://www.youtube.com/embed/{ace_attorney_id}"></iframe>
261
- </div>
262
- <div class="video-title">โš–๏ธ Ace Attorney</div>
263
- </div>
264
- <div class="video-card">
265
- <div class="video-wrapper">
266
- <iframe src="https://www.youtube.com/embed/{tetris_id}"></iframe>
267
- </div>
268
- <div class="video-title">๐Ÿงฉ Tetris</div>
269
- </div>
270
- </div>
271
-
272
- <!-- News Section -->
273
- <div class="news-section">
274
- <div class="news-section-title">๐Ÿ“ฐ Latest News</div>
275
- {news_html}
276
- </div>
277
- </div>
278
- '''
279
- return gr.HTML(gallery_html)
280
-
281
- def create_gallery_tab():
282
- """Create and return the gallery tab component"""
283
- with gr.Tab("๐ŸŽฅ Gallery") as gallery_tab:
284
- video_gallery = create_video_gallery()
285
  return gallery_tab
 
1
+ import gradio as gr
2
+ from datetime import datetime
3
+ import json
4
+
5
+ # Load video links and news data
6
+ with open('assets/game_video_link.json', 'r') as f:
7
+ VIDEO_LINKS = json.load(f)
8
+
9
+ with open('assets/news.json', 'r') as f:
10
+ NEWS_DATA = json.load(f)
11
+
12
+ def create_video_gallery():
13
+ """Create a custom HTML/JS component for video gallery"""
14
+ # Extract video IDs
15
+ mario_id = VIDEO_LINKS["super_mario_bros"].split("?v=")[1]
16
+ sokoban_id = VIDEO_LINKS["sokoban"].split("?v=")[1]
17
+ game_2048_id = VIDEO_LINKS["2048"].split("?v=")[1]
18
+ candy_id = VIDEO_LINKS["candy"].split("?v=")[1]
19
+ ace_attorney_id = VIDEO_LINKS["ace_attorney"].split("?v=")[1]
20
+ tetris_id = VIDEO_LINKS["tetris"].split("?v=")[1]
21
+
22
+ # Get the latest video from news data
23
+ latest_news = NEWS_DATA["news"][0] # First item is the latest
24
+ latest_video_id = latest_news["video_link"].split("?v=")[1]
25
+ latest_date = datetime.strptime(latest_news["date"], "%Y-%m-%d")
26
+ formatted_latest_date = latest_date.strftime("%B %d, %Y")
27
+
28
+ # Generate news HTML
29
+ news_items = []
30
+ for item in NEWS_DATA["news"]:
31
+ video_id = item["video_link"].split("?v=")[1]
32
+ date_obj = datetime.strptime(item["date"], "%Y-%m-%d")
33
+ formatted_date = date_obj.strftime("%B %d, %Y")
34
+ news_items.append(f'''
35
+ <div class="news-item">
36
+ <div class="news-date">{formatted_date}</div>
37
+ <div class="news-content">
38
+ <div class="news-video">
39
+ <div class="video-wrapper">
40
+ <iframe src="https://www.youtube.com/embed/{video_id}"></iframe>
41
+ </div>
42
+ </div>
43
+ <div class="news-text">
44
+ <a href="{item["twitter_link"]}" target="_blank" class="twitter-link">
45
+ <span class="twitter-icon">๐Ÿ“ข</span>
46
+ {item["twitter_text"]}
47
+ </a>
48
+ </div>
49
+ </div>
50
+ </div>
51
+ ''')
52
+
53
+ news_html = '\n'.join(news_items)
54
+
55
+ gallery_html = f'''
56
+ <div class="video-gallery-container">
57
+ <style>
58
+ .video-gallery-container {{
59
+ width: 100%;
60
+ max-width: 1400px;
61
+ margin: 0 auto;
62
+ padding: 20px;
63
+ }}
64
+ .highlight-section {{
65
+ margin-bottom: 40px;
66
+ }}
67
+ .highlight-card {{
68
+ background: #ffffff;
69
+ border-radius: 10px;
70
+ box-shadow: 0 4px 20px rgba(0,0,0,0.15);
71
+ overflow: hidden;
72
+ transition: transform 0.3s;
73
+ border: 2px solid #2196F3;
74
+ }}
75
+ .highlight-card:hover {{
76
+ transform: translateY(-5px);
77
+ }}
78
+ .highlight-header {{
79
+ background: #2196F3;
80
+ color: white;
81
+ padding: 15px 20px;
82
+ font-size: 1.2em;
83
+ font-weight: bold;
84
+ display: flex;
85
+ align-items: center;
86
+ gap: 10px;
87
+ }}
88
+ .highlight-date {{
89
+ font-size: 0.9em;
90
+ opacity: 0.9;
91
+ }}
92
+ .highlight-content {{
93
+ padding: 20px;
94
+ }}
95
+ .video-grid {{
96
+ display: grid;
97
+ grid-template-columns: repeat(2, 1fr);
98
+ gap: 20px;
99
+ margin-top: 20px;
100
+ margin-bottom: 40px;
101
+ }}
102
+ .video-card {{
103
+ background: var(--card-bg, #ffffff);
104
+ border-radius: 10px;
105
+ box-shadow: 0 2px 10px rgba(0,0,0,0.1);
106
+ overflow: hidden;
107
+ transition: transform 0.2s;
108
+ }}
109
+ .video-card:hover {{
110
+ transform: translateY(-5px);
111
+ }}
112
+ .video-wrapper {{
113
+ position: relative;
114
+ padding-bottom: 56.25%;
115
+ height: 0;
116
+ overflow: hidden;
117
+ }}
118
+ .video-wrapper iframe {{
119
+ position: absolute;
120
+ top: 0;
121
+ left: 0;
122
+ width: 100%;
123
+ height: 100%;
124
+ border: none;
125
+ }}
126
+ .video-title {{
127
+ padding: 15px;
128
+ font-size: 1.2em;
129
+ font-weight: bold;
130
+ color: var(--title-text, #2c3e50);
131
+ text-align: center;
132
+ background: var(--title-bg, #f8f9fa);
133
+ border-top: 1px solid var(--border-color, #eee);
134
+ }}
135
+ .news-section {{
136
+ margin-top: 40px;
137
+ border-top: 2px solid #e9ecef;
138
+ padding-top: 20px;
139
+ }}
140
+ .news-section-title {{
141
+ font-size: 1.8em;
142
+ font-weight: bold;
143
+ color: #2c3e50;
144
+ margin-bottom: 20px;
145
+ text-align: center;
146
+ }}
147
+ .news-item {{
148
+ background: #ffffff;
149
+ border-radius: 10px;
150
+ box-shadow: 0 2px 10px rgba(0,0,0,0.1);
151
+ margin-bottom: 20px;
152
+ overflow: hidden;
153
+ }}
154
+ .news-date {{
155
+ padding: 10px 20px;
156
+ background: #f8f9fa;
157
+ color: #666;
158
+ font-size: 0.9em;
159
+ border-bottom: 1px solid #eee;
160
+ }}
161
+ .news-content {{
162
+ display: flex;
163
+ padding: 20px;
164
+ align-items: center;
165
+ gap: 30px;
166
+ }}
167
+ .news-video {{
168
+ flex: 0 0 300px;
169
+ }}
170
+ .news-text {{
171
+ flex: 1;
172
+ display: flex;
173
+ align-items: center;
174
+ min-height: 169px;
175
+ }}
176
+ .twitter-link {{
177
+ color: #2c3e50;
178
+ text-decoration: none;
179
+ display: flex;
180
+ align-items: center;
181
+ gap: 15px;
182
+ font-size: 1.4em;
183
+ font-weight: 600;
184
+ line-height: 1.4;
185
+ }}
186
+ .twitter-link:hover {{
187
+ color: #1da1f2;
188
+ }}
189
+ .twitter-icon {{
190
+ font-size: 1.5em;
191
+ color: #1da1f2;
192
+ }}
193
+
194
+ /* Dark mode specific styles */
195
+ .dark .video-card {{
196
+ --card-bg: #2d3748;
197
+ --title-bg: #1a202c;
198
+ --title-text: #e2e8f0;
199
+ --border-color: #4a5568;
200
+ }}
201
+
202
+ /* Light mode specific styles */
203
+ .light .video-card {{
204
+ --card-bg: #ffffff;
205
+ --title-bg: #f8f9fa;
206
+ --title-text: #2c3e50;
207
+ --border-color: #eee;
208
+ }}
209
+ </style>
210
+
211
+ <!-- Highlight Section -->
212
+ <div class="highlight-section">
213
+ <div class="highlight-card">
214
+ <div class="highlight-header">
215
+ <span>๐ŸŒŸ Latest Update</span>
216
+ <span class="highlight-date">{formatted_latest_date}</span>
217
+ </div>
218
+ <div class="highlight-content">
219
+ <div class="video-wrapper">
220
+ <iframe src="https://www.youtube.com/embed/{latest_video_id}"></iframe>
221
+ </div>
222
+ <div class="video-title">
223
+ <a href="{latest_news["twitter_link"]}" target="_blank" class="twitter-link">
224
+ <span class="twitter-icon">๐Ÿ“ข</span>
225
+ {latest_news["twitter_text"]}
226
+ </a>
227
+ </div>
228
+ </div>
229
+ </div>
230
+ </div>
231
+
232
+ <!-- Regular Video Grid -->
233
+ <div class="video-grid">
234
+ <div class="video-card">
235
+ <div class="video-wrapper">
236
+ <iframe src="https://www.youtube.com/embed/{mario_id}"></iframe>
237
+ </div>
238
+ <div class="video-title">๐ŸŽฎ Super Mario Bros</div>
239
+ </div>
240
+ <div class="video-card">
241
+ <div class="video-wrapper">
242
+ <iframe src="https://www.youtube.com/embed/{sokoban_id}"></iframe>
243
+ </div>
244
+ <div class="video-title">๐Ÿ“ฆ Sokoban</div>
245
+ </div>
246
+ <div class="video-card">
247
+ <div class="video-wrapper">
248
+ <iframe src="https://www.youtube.com/embed/{game_2048_id}"></iframe>
249
+ </div>
250
+ <div class="video-title">๐Ÿ”ข 2048</div>
251
+ </div>
252
+ <div class="video-card">
253
+ <div class="video-wrapper">
254
+ <iframe src="https://www.youtube.com/embed/{candy_id}"></iframe>
255
+ </div>
256
+ <div class="video-title">๐Ÿฌ Candy Crush</div>
257
+ </div>
258
+ <div class="video-card">
259
+ <div class="video-wrapper">
260
+ <iframe src="https://www.youtube.com/embed/{ace_attorney_id}"></iframe>
261
+ </div>
262
+ <div class="video-title">โš–๏ธ Ace Attorney</div>
263
+ </div>
264
+ <div class="video-card">
265
+ <div class="video-wrapper">
266
+ <iframe src="https://www.youtube.com/embed/{tetris_id}"></iframe>
267
+ </div>
268
+ <div class="video-title">๐Ÿงฉ Tetris</div>
269
+ </div>
270
+ </div>
271
+
272
+ <!-- News Section -->
273
+ <div class="news-section">
274
+ <div class="news-section-title">๐Ÿ“ฐ Latest News</div>
275
+ {news_html}
276
+ </div>
277
+ </div>
278
+ '''
279
+ return gr.HTML(gallery_html)
280
+
281
+ def create_gallery_tab():
282
+ """Create and return the gallery tab component"""
283
+ with gr.Tab("๐ŸŽฅ Gallery") as gallery_tab:
284
+ video_gallery = create_video_gallery()
285
  return gallery_tab
leaderboard_utils.py CHANGED
@@ -28,6 +28,12 @@ def get_organization(model_name):
28
  return "meta"
29
  elif "grok" in m:
30
  return "xai"
 
 
 
 
 
 
31
  else:
32
  return "unknown"
33
 
 
28
  return "meta"
29
  elif "grok" in m:
30
  return "xai"
31
+ elif "qwen" in m:
32
+ return "alibaba"
33
+ elif "glm" in m:
34
+ return "zhipu"
35
+ elif "kimi" in m:
36
+ return "moonshot"
37
  else:
38
  return "unknown"
39
 
rank_data_03_25_2025.json CHANGED
@@ -3,69 +3,69 @@
3
  "runs": 3,
4
  "results": [
5
  {
6
- "model": "๐ŸŽฎ claude-3-5-sonnet-20241022 (GamingAgent)",
7
  "score": 1267.7,
8
- "detail_data": "709,1532,1562",
9
  "progress": "1-1"
10
  },
11
  {
12
- "model": "๐ŸŽฎ claude-3-7-sonnet-20250219 (GamingAgent)",
13
  "score": 1418.7,
14
- "detail_data": "2015,709,1532",
15
  "progress": "1-1"
16
  },
17
  {
18
- "model": "๐ŸŽฎ gemini-2.5-flash-preview-04-17 (GamingAgent)",
19
  "score": 1385.0,
20
- "detail_data": "1672,1266,1247",
21
  "progress": "1-1"
22
  },
23
  {
24
- "model": "๐ŸŽฎ gemini-2.5-pro-preview-05-06 (GamingAgent)",
25
  "score": 1498.3,
26
- "detail_data": "1561,1271,1663",
27
  "progress": "1-1"
28
  },
29
  {
30
- "model": "๐ŸŽฎ llama-4-maverick-17b-128e-instruct-fp8 (GamingAgent)",
31
  "score": 1468.7,
32
- "detail_data": "898,2008,1500",
33
  "progress": "1-1"
34
  },
35
  {
36
- "model": "๐ŸŽฎ gpt-4.1-2025-04-14 (GamingAgent)",
37
  "score": 2126.3,
38
- "detail_data": "1531,722,4126",
39
  "progress": "1-1"
40
  },
41
  {
42
- "model": "๐ŸŽฎ gpt-4o-2024-11-20 (GamingAgent)",
43
  "score": 2047.3,
44
- "detail_data": "2017,2590,1535",
45
  "progress": "1-1"
46
  },
47
  {
48
- "model": "๐ŸŽฎ o1-2024-12-17 (GamingAgent)",
49
  "score": 855,
50
- "detail_data": "855",
51
  "progress": "1-1"
52
  },
53
  {
54
- "model": "๐ŸŽฎ o3-2025-04-16 (GamingAgent)",
55
  "score": 3445,
56
- "detail_data": "3445",
57
  "progress": "1-1"
58
  },
59
  {
60
- "model": "๐ŸŽฎ o4-mini-2025-04-16 (GamingAgent)",
61
  "score": 1448.0,
62
- "detail_data": "1525,1263,1556",
63
  "progress": "1-1"
64
  },
65
  {
66
  "model": "random (x30)",
67
  "score": 986.97,
68
- "detail_data": "986.97",
69
  "progress": "1-1"
70
  }
71
  ]
@@ -74,79 +74,79 @@
74
  "runs": 3,
75
  "results": [
76
  {
77
- "model": "๐ŸŽฎ claude-3-5-sonnet-20241022 (GamingAgent)",
78
  "score": 1914.67,
79
  "details": "1352,2860,1532",
80
  "highest_tail": 256
81
  },
82
  {
83
- "model": "๐ŸŽฎ claude-3-7-sonnet-20250219 (GamingAgent)",
84
  "score": 2624,
85
  "details": "2560,3224,2088",
86
  "highest_tail": 256
87
  },
88
  {
89
- "model": "๐ŸŽฎ deepseek-r1-0120 (GamingAgent)",
90
  "score": 1873.33,
91
  "details": "700,1240,3680",
92
  "highest_tail": 256
93
  },
94
  {
95
- "model": "๐ŸŽฎ gemini-2.5-flash-preview-04-17 (GamingAgent)",
96
  "score": 1697.33,
97
  "details": "1304,1316,2472",
98
  "highest_tail": 256
99
  },
100
  {
101
- "model": "๐ŸŽฎ gemini-2.5-pro-preview-05-06 (GamingAgent)",
102
  "score": 3586.67,
103
  "details": "5300,2400,3060",
104
  "highest_tail": 512
105
  },
106
  {
107
- "model": "๐ŸŽฎ grok-3-mini-beta (GamingAgent)",
108
  "score": 4036,
109
  "details": "6412,2492,3204",
110
  "highest_tail": 512
111
  },
112
  {
113
- "model": "๐ŸŽฎ llama-4-maverick-17b-128e-instruct-fp8 (GamingAgent)",
114
  "score": 1586.67,
115
  "details": "1404,1272,2084",
116
  "highest_tail": 128
117
  },
118
  {
119
- "model": "๐ŸŽฎ gpt-4.1-2025-04-14 (GamingAgent)",
120
  "score": 1656,
121
  "details": "1156,2664,1148",
122
  "highest_tail": 256
123
  },
124
  {
125
- "model": "๐ŸŽฎ gpt-4o-2024-11-20 (GamingAgent)",
126
  "score": 1656,
127
  "details": "1604,1284,2080",
128
  "highest_tail": 256
129
  },
130
  {
131
- "model": "๐ŸŽฎ o1-2024-12-17 (GamingAgent)",
132
  "score": 7580,
133
  "details": "7580",
134
  "highest_tail": 512
135
  },
136
  {
137
- "model": "๐ŸŽฎ o1-mini-2024-09-12 (GamingAgent)",
138
  "score": 2757.33,
139
  "details": "3132,2004,3136",
140
  "highest_tail": 256
141
  },
142
  {
143
- "model": "๐ŸŽฎ o3-2025-04-16 (GamingAgent)",
144
  "score": 7120,
145
  "details": "7120",
146
  "highest_tail": 512
147
  },
148
  {
149
- "model": "๐ŸŽฎ o4-mini-2025-04-16 (GamingAgent)",
150
  "score": 4432.0,
151
  "details": "4928,5456,2912",
152
  "highest_tail": 512
@@ -158,27 +158,63 @@
158
  "highest_tail": 128
159
  },
160
  {
161
- "model": "๐ŸŽฎ claude-opus-4-20250514 (GamingAgent)",
162
  "score": 3036.0,
163
  "details": "3036.0",
164
  "highest_tail": 256
165
  },
166
  {
167
- "model": "๐ŸŽฎ claude-sonnet-4-20250514 (GamingAgent)",
168
  "score": 3136,
169
- "details": "2148,2360,4900",
170
  "highest_tail": 256
171
  },
172
  {
173
- "model": "๐ŸŽฎ deepseek-r1-0528 (GamingAgent)",
174
  "score": 3330.0,
175
- "details": "3260,3400",
176
  "highest_tail": 256
177
  },
178
  {
179
- "model": "๐ŸŽฎ qwen3-235B-A22B-fp8 (GamingAgent)",
180
  "score": 2144.0,
181
- "details": "1436,2556,2440",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  "highest_tail": 256
183
  }
184
  ]
@@ -187,67 +223,67 @@
187
  "runs": 3,
188
  "results": [
189
  {
190
- "model": "๐ŸŽฎ claude-3-5-sonnet-20241022 (GamingAgent)",
191
  "score": 14.7,
192
  "details": "16,14,14"
193
  },
194
  {
195
- "model": "๐ŸŽฎ claude-3-7-sonnet-20250219 (GamingAgent)",
196
  "score": 16.3,
197
  "details": "19,15,15"
198
  },
199
  {
200
- "model": "๐ŸŽฎ deepseek-r1-0120 (GamingAgent)",
201
  "score": 14.3,
202
  "details": "15,14,14"
203
  },
204
  {
205
- "model": "๐ŸŽฎ gemini-2.5-flash-preview-04-17 (GamingAgent)",
206
  "score": 16.3,
207
  "details": "20,14,15"
208
  },
209
  {
210
- "model": "๐ŸŽฎ gemini-2.5-pro-preview-05-06 (GamingAgent)",
211
  "score": 23.3,
212
  "details": "23,23,24"
213
  },
214
  {
215
- "model": "๐ŸŽฎ grok-3-mini-beta (GamingAgent)",
216
  "score": 21.3,
217
  "details": "20,15,29"
218
  },
219
  {
220
- "model": "๐ŸŽฎ llama-4-maverick-17b-128e-instruct-fp8 (GamingAgent)",
221
  "score": 10.3,
222
  "details": "9,10,12"
223
  },
224
  {
225
- "model": "๐ŸŽฎ gpt-4.1-2025-04-14 (GamingAgent)",
226
  "score": 13.7,
227
  "details": "13,14,14"
228
  },
229
  {
230
- "model": "๐ŸŽฎ gpt-4o-2024-11-20 (GamingAgent)",
231
  "score": 14,
232
  "details": "18,11,13"
233
  },
234
  {
235
- "model": "๐ŸŽฎ o1-2024-12-17 (GamingAgent)",
236
  "score": 35,
237
  "details": "35"
238
  },
239
  {
240
- "model": "๐ŸŽฎ o1-mini-2024-09-12 (GamingAgent)",
241
  "score": 11.7,
242
  "details": "11,11,13"
243
  },
244
  {
245
- "model": "๐ŸŽฎ o3-2025-04-16 (GamingAgent)",
246
  "score": 42,
247
  "details": "42"
248
  },
249
  {
250
- "model": "๐ŸŽฎ o4-mini-2025-04-16 (GamingAgent)",
251
  "score": 25.3,
252
  "details": "22,35,19"
253
  },
@@ -257,24 +293,54 @@
257
  "details": ""
258
  },
259
  {
260
- "model": "๐ŸŽฎ claude-opus-4-20250514 (GamingAgent)",
261
  "score": 20,
262
  "details": "17,18,25"
263
  },
264
  {
265
- "model": "๐ŸŽฎ claude-sonnet-4-20250514 (GamingAgent)",
266
  "score": 19.33,
267
  "details": "20,17,21"
268
  },
269
  {
270
- "model": "๐ŸŽฎ deepseek-r1-0528 (GamingAgent)",
271
  "score": 33.67,
272
  "details": "26,34,41"
273
  },
274
  {
275
- "model": "๐ŸŽฎ qwen3-235B-A22B-fp8 (GamingAgent)",
276
  "score": 11.67,
277
  "details": "13,14,8"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
  }
279
  ]
280
  },
@@ -282,67 +348,67 @@
282
  "runs": 3,
283
  "results": [
284
  {
285
- "model": "๐ŸŽฎ claude-3-5-sonnet-20241022 (GamingAgent)",
286
  "score": 106,
287
  "details": "92,165,61"
288
  },
289
  {
290
- "model": "๐ŸŽฎ claude-3-7-sonnet-20250219 (GamingAgent)",
291
  "score": 484,
292
  "details": "535,428,489"
293
  },
294
  {
295
- "model": "๐ŸŽฎ deepseek-r1-0120 (GamingAgent)",
296
  "score": 447.3,
297
  "details": "409,436,497"
298
  },
299
  {
300
- "model": "๐ŸŽฎ gemini-2.5-flash-preview-04-17 (GamingAgent)",
301
  "score": 334.7,
302
  "details": "259,372,373"
303
  },
304
  {
305
- "model": "๐ŸŽฎ gemini-2.5-pro-preview-05-06 (GamingAgent)",
306
  "score": 416.3,
307
  "details": "411,414,424"
308
  },
309
  {
310
- "model": "๐ŸŽฎ grok-3-mini-beta (GamingAgent)",
311
  "score": 254,
312
  "details": "299,332,131"
313
  },
314
  {
315
- "model": "๐ŸŽฎ llama-4-maverick-17b-128e-instruct-fp8 (GamingAgent)",
316
  "score": 128.7,
317
  "details": "67,139,180"
318
  },
319
  {
320
- "model": "๐ŸŽฎ gpt-4.1-2025-04-14 (GamingAgent)",
321
  "score": 182,
322
  "details": "163,215,168"
323
  },
324
  {
325
- "model": "๐ŸŽฎ gpt-4o-2024-11-20 (GamingAgent)",
326
  "score": 147.3,
327
  "details": "131,104,207"
328
  },
329
  {
330
- "model": "๐ŸŽฎ o1-2024-12-17 (GamingAgent)",
331
  "score": 159,
332
  "details": "159"
333
  },
334
  {
335
- "model": "๐ŸŽฎ o1-mini-2024-09-12 (GamingAgent)",
336
  "score": 48,
337
  "details": "21,86,37"
338
  },
339
  {
340
- "model": "๐ŸŽฎ o3-2025-04-16 (GamingAgent)",
341
  "score": 647,
342
  "details": "647"
343
  },
344
  {
345
- "model": "๐ŸŽฎ o4-mini-2025-04-16 (GamingAgent)",
346
  "score": 487.3,
347
  "details": "259,591,612"
348
  },
@@ -352,24 +418,54 @@
352
  "details": ""
353
  },
354
  {
355
- "model": "๐ŸŽฎ claude-opus-4-20250514 (GamingAgent)",
356
  "score": 464,
357
  "details": "593,406,393"
358
  },
359
  {
360
- "model": "๐ŸŽฎ claude-sonnet-4-20250514 (GamingAgent)",
361
  "score": 478.33,
362
  "details": "545,468,422"
363
  },
364
  {
365
- "model": "๐ŸŽฎ deepseek-r1-0528 (GamingAgent)",
366
  "score": 491.67,
367
  "details": "464,463,548"
368
  },
369
  {
370
- "model": "๐ŸŽฎ qwen3-235B-A22B-fp8 (GamingAgent)",
371
  "score": 363.33,
372
  "details": "365,372,353"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
  }
374
  ]
375
  },
@@ -377,108 +473,148 @@
377
  "runs": 3,
378
  "results": [
379
  {
380
- "model": "๐ŸŽฎ claude-3-5-sonnet-20241022 (GamingAgent)",
381
  "score": 0,
382
- "detail_box_on_target": "0,0,0",
383
  "cracked_levels": "0,0,0"
384
  },
385
  {
386
- "model": "๐ŸŽฎ claude-3-7-sonnet-20250219 (GamingAgent)",
387
  "score": 2.33,
388
- "detail_box_on_target": "2,4,1",
389
  "cracked_levels": "1,2,0"
390
  },
391
  {
392
- "model": "๐ŸŽฎ deepseek-r1-0120 (GamingAgent)",
393
  "score": 1.33,
394
- "detail_box_on_target": "2,0,2",
395
  "cracked_levels": "1,0,1"
396
  },
397
  {
398
- "model": "๐ŸŽฎ gemini-2.5-flash-preview-04-17 (GamingAgent)",
399
  "score": 1.67,
400
- "detail_box_on_target": "3,0,2",
401
  "cracked_levels": "2,0,1"
402
  },
403
  {
404
- "model": "๐ŸŽฎ gemini-2.5-pro-preview-05-06 (GamingAgent)",
405
  "score": 4.33,
406
- "detail_box_on_target": "4,4,5",
407
  "cracked_levels": "2,2,3"
408
  },
409
  {
410
- "model": "๐ŸŽฎ grok-3-mini-beta (GamingAgent)",
411
  "score": 5.67,
412
- "detail_box_on_target": "5,6,6",
413
  "cracked_levels": "3,3,3"
414
  },
415
  {
416
- "model": "๐ŸŽฎ llama-4-maverick-17b-128e-instruct-fp8 (GamingAgent)",
417
  "score": 0,
418
- "detail_box_on_target": "0,0,0",
419
  "cracked_levels": "0,0,0"
420
  },
421
  {
422
- "model": "๐ŸŽฎ gpt-4.1-2025-04-14 (GamingAgent)",
423
  "score": 0,
424
- "detail_box_on_target": "0,0,0",
425
  "cracked_levels": "0,0,0"
426
  },
427
  {
428
- "model": "๐ŸŽฎ gpt-4o-2024-11-20 (GamingAgent)",
429
  "score": 0,
430
- "detail_box_on_target": "0,0,0",
431
  "cracked_levels": "0,0,0"
432
  },
433
  {
434
- "model": "๐ŸŽฎ o1-2024-12-17 (GamingAgent)",
435
  "score": 2.33,
436
- "detail_box_on_target": "2,2,3",
437
  "cracked_levels": "1,1,2"
438
  },
439
  {
440
- "model": "๐ŸŽฎ o1-mini-2024-09-12 (GamingAgent)",
441
  "score": 1.33,
442
- "detail_box_on_target": "1,2,1",
443
  "cracked_levels": "0,1,0"
444
  },
445
  {
446
- "model": "๐ŸŽฎ o3-2025-04-16 (GamingAgent)",
447
  "score": 8,
448
- "detail_box_on_target": "10,6",
449
  "cracked_levels": "5,3"
450
  },
451
  {
452
- "model": "๐ŸŽฎ o4-mini-2025-04-16 (GamingAgent)",
453
  "score": 5.33,
454
- "detail_box_on_target": "4,6,6",
455
- "cracked_levels": "2,2,3"
456
  },
457
  {
458
  "model": "random (x30)",
459
  "score": 0,
460
- "detail_box_on_target": "0,0,0",
461
  "cracked_levels": "0,0,0"
462
  },
463
  {
464
- "model": "๐ŸŽฎ claude-opus-4-20250514 (GamingAgent)",
465
  "score": 4,
466
- "details": "4,4,4"
 
467
  },
468
  {
469
- "model": "๐ŸŽฎ claude-sonnet-4-20250514 (GamingAgent)",
470
  "score": 3,
471
- "details": "2,2,5"
 
472
  },
473
  {
474
- "model": "๐ŸŽฎ deepseek-r1-0528 (GamingAgent)",
475
  "score": 4.67,
476
- "details": "4,4,6"
 
477
  },
478
  {
479
- "model": "๐ŸŽฎ qwen3-235B-A22B-fp8 (GamingAgent)",
480
  "score": 2.33,
481
- "details": "1,2,4"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
482
  }
483
  ]
484
  },
@@ -486,79 +622,79 @@
486
  "runs": 1,
487
  "results": [
488
  {
489
- "model": "๐ŸŽฎ claude-3-5-sonnet-20241022 (GamingAgent)",
490
  "score": 2,
491
  "progress": "1:2/5",
492
  "evaluator result": "1/3"
493
  },
494
  {
495
- "model": "๐ŸŽฎ claude-3-7-sonnet-20250219 (GamingAgent)",
496
  "score": 7,
497
  "progress": "2:2/9",
498
  "evaluator result": "5/11"
499
  },
500
  {
501
- "model": "๐ŸŽฎ deepseek-r1-0120 (GamingAgent)",
502
  "score": 0,
503
  "progress": "0",
504
  "evaluator result": "1/5"
505
  },
506
  {
507
- "model": "๐ŸŽฎ gemini-2.5-flash-preview-04-17 (GamingAgent)",
508
  "score": 4,
509
  "progress": "1:4/5",
510
  "evaluator result": "1/7"
511
  },
512
  {
513
- "model": "๐ŸŽฎ gemini-2.5-pro-preview-05-06 (GamingAgent)",
514
  "score": 7,
515
  "progress": "2:2/9",
516
  "evaluator result": "2/3"
517
  },
518
  {
519
- "model": "๐ŸŽฎ grok-3-mini-beta (GamingAgent)",
520
  "score": 0,
521
  "progress": "0",
522
  "evaluator result": "0"
523
  },
524
  {
525
- "model": "๐ŸŽฎ llama-4-maverick-17b-128e-instruct-fp8 (GamingAgent)",
526
  "score": 0,
527
  "progress": "0",
528
  "evaluator result": "0"
529
  },
530
  {
531
- "model": "๐ŸŽฎ gpt-4.1-2025-04-14 (GamingAgent)",
532
  "score": 2,
533
  "progress": "1:2/5",
534
  "evaluator result": "2/3"
535
  },
536
  {
537
- "model": "๐ŸŽฎ gpt-4o-2024-11-20 (GamingAgent)",
538
  "score": 0,
539
  "progress": "0",
540
  "evaluator result": "0"
541
  },
542
  {
543
- "model": "๐ŸŽฎ o1-2024-12-17 (GamingAgent)",
544
  "score": 16,
545
  "progress": "3: 2/8",
546
  "evaluator result": "6/11"
547
  },
548
  {
549
- "model": "๐ŸŽฎ o1-mini-2024-09-12 (GamingAgent)",
550
  "score": 0,
551
  "progress": "0",
552
  "evaluator result": "1/5"
553
  },
554
  {
555
- "model": "๐ŸŽฎ o3-2025-04-16 (GamingAgent)",
556
  "score": 16,
557
  "progress": "3: 2/8",
558
  "evaluator result": "1/2"
559
  },
560
  {
561
- "model": "๐ŸŽฎ o4-mini-2025-04-16 (GamingAgent)",
562
  "score": 4,
563
  "progress": "1:4/5",
564
  "evaluator result": "2/5"
@@ -570,17 +706,17 @@
570
  "evaluator result": "0"
571
  },
572
  {
573
- "model": "๐ŸŽฎ claude-opus-4-20250514 (GamingAgent)",
574
  "score": 6,
575
  "details": "6"
576
  },
577
  {
578
- "model": "๐ŸŽฎ claude-sonnet-4-20250514 (GamingAgent)",
579
  "score": 3.67,
580
  "details": "3,4,4"
581
  },
582
  {
583
- "model": "๐ŸŽฎ gemini-2.5-flash-preview-05-20 (GamingAgent)",
584
  "score": 4.33,
585
  "details": "3,4,6"
586
  }
 
3
  "runs": 3,
4
  "results": [
5
  {
6
+ "model": "๐ŸŽฎ claude-3-5-sonnet-20241022",
7
  "score": 1267.7,
8
+ "detail_data":"709,1532,1562",
9
  "progress": "1-1"
10
  },
11
  {
12
+ "model": "๐ŸŽฎ claude-3-7-sonnet-20250219",
13
  "score": 1418.7,
14
+ "detail_data":"2015,709,1532",
15
  "progress": "1-1"
16
  },
17
  {
18
+ "model": "๐ŸŽฎ gemini-2.5-flash-preview-04-17",
19
  "score": 1385.0,
20
+ "detail_data":"1672,1266,1247",
21
  "progress": "1-1"
22
  },
23
  {
24
+ "model": "๐ŸŽฎ gemini-2.5-pro-preview-05-06",
25
  "score": 1498.3,
26
+ "detail_data":"1561,1271,1663",
27
  "progress": "1-1"
28
  },
29
  {
30
+ "model": "๐ŸŽฎ llama-4-maverick-17b-128e-instruct-fp8",
31
  "score": 1468.7,
32
+ "detail_data":"898,2008,1500",
33
  "progress": "1-1"
34
  },
35
  {
36
+ "model": "๐ŸŽฎ gpt-4.1-2025-04-14",
37
  "score": 2126.3,
38
+ "detail_data":"1531,722,4126",
39
  "progress": "1-1"
40
  },
41
  {
42
+ "model": "๐ŸŽฎ gpt-4o-2024-11-20",
43
  "score": 2047.3,
44
+ "detail_data":"2017,2590,1535",
45
  "progress": "1-1"
46
  },
47
  {
48
+ "model": "๐ŸŽฎ o1-2024-12-17",
49
  "score": 855,
50
+ "detail_data":"855",
51
  "progress": "1-1"
52
  },
53
  {
54
+ "model": "๐ŸŽฎ o3-2025-04-16",
55
  "score": 3445,
56
+ "detail_data":"3445",
57
  "progress": "1-1"
58
  },
59
  {
60
+ "model": "๐ŸŽฎ o4-mini-2025-04-16",
61
  "score": 1448.0,
62
+ "detail_data":"1525,1263,1556",
63
  "progress": "1-1"
64
  },
65
  {
66
  "model": "random (x30)",
67
  "score": 986.97,
68
+ "detail_data":"986.97",
69
  "progress": "1-1"
70
  }
71
  ]
 
74
  "runs": 3,
75
  "results": [
76
  {
77
+ "model": "๐ŸŽฎ claude-3-5-sonnet-20241022",
78
  "score": 1914.67,
79
  "details": "1352,2860,1532",
80
  "highest_tail": 256
81
  },
82
  {
83
+ "model": "๐ŸŽฎ claude-3-7-sonnet-20250219",
84
  "score": 2624,
85
  "details": "2560,3224,2088",
86
  "highest_tail": 256
87
  },
88
  {
89
+ "model": "๐ŸŽฎ deepseek-r1-0120",
90
  "score": 1873.33,
91
  "details": "700,1240,3680",
92
  "highest_tail": 256
93
  },
94
  {
95
+ "model": "๐ŸŽฎ gemini-2.5-flash-preview-04-17",
96
  "score": 1697.33,
97
  "details": "1304,1316,2472",
98
  "highest_tail": 256
99
  },
100
  {
101
+ "model": "๐ŸŽฎ gemini-2.5-pro-preview-05-06",
102
  "score": 3586.67,
103
  "details": "5300,2400,3060",
104
  "highest_tail": 512
105
  },
106
  {
107
+ "model": "๐ŸŽฎ grok-3-mini-beta",
108
  "score": 4036,
109
  "details": "6412,2492,3204",
110
  "highest_tail": 512
111
  },
112
  {
113
+ "model": "๐ŸŽฎ llama-4-maverick-17b-128e-instruct-fp8",
114
  "score": 1586.67,
115
  "details": "1404,1272,2084",
116
  "highest_tail": 128
117
  },
118
  {
119
+ "model": "๐ŸŽฎ gpt-4.1-2025-04-14",
120
  "score": 1656,
121
  "details": "1156,2664,1148",
122
  "highest_tail": 256
123
  },
124
  {
125
+ "model": "๐ŸŽฎ gpt-4o-2024-11-20",
126
  "score": 1656,
127
  "details": "1604,1284,2080",
128
  "highest_tail": 256
129
  },
130
  {
131
+ "model": "๐ŸŽฎ o1-2024-12-17",
132
  "score": 7580,
133
  "details": "7580",
134
  "highest_tail": 512
135
  },
136
  {
137
+ "model": "๐ŸŽฎ o1-mini-2024-09-12",
138
  "score": 2757.33,
139
  "details": "3132,2004,3136",
140
  "highest_tail": 256
141
  },
142
  {
143
+ "model": "๐ŸŽฎ o3-2025-04-16",
144
  "score": 7120,
145
  "details": "7120",
146
  "highest_tail": 512
147
  },
148
  {
149
+ "model": "๐ŸŽฎ o4-mini-2025-04-16",
150
  "score": 4432.0,
151
  "details": "4928,5456,2912",
152
  "highest_tail": 512
 
158
  "highest_tail": 128
159
  },
160
  {
161
+ "model": "๐ŸŽฎ claude-opus-4-20250514",
162
  "score": 3036.0,
163
  "details": "3036.0",
164
  "highest_tail": 256
165
  },
166
  {
167
+ "model": "๐ŸŽฎ claude-sonnet-4-20250514",
168
  "score": 3136,
169
+ "details": "2148, 2360, 4900",
170
  "highest_tail": 256
171
  },
172
  {
173
+ "model": "๐ŸŽฎ deepseek-r1-0528",
174
  "score": 3330.0,
175
+ "details": "3260, 3400",
176
  "highest_tail": 256
177
  },
178
  {
179
+ "model": "๐ŸŽฎ qwen3-235B-A22B-fp8",
180
  "score": 2144.0,
181
+ "details": "1436, 2556, 2440",
182
+ "highest_tail": 256
183
+ },
184
+ {
185
+ "model": "๐ŸŽฎ grok-4-0709",
186
+ "score": 4650.7,
187
+ "details": "4120, 4080, 5752",
188
+ "highest_tail": 512
189
+ },
190
+ {
191
+ "model": "๐ŸŽฎ kimi-k2-0711-preview",
192
+ "score": 1562.67,
193
+ "details": "2360, 1628, 700",
194
+ "highest_tail": 256
195
+ },
196
+ {
197
+ "model": "๐ŸŽฎ glm-4.5",
198
+ "score": 1201.33,
199
+ "details": "1440, 1064, 1100",
200
+ "highest_tail": 128
201
+ },
202
+ {
203
+ "model": "๐ŸŽฎ qwen3-235B-A22B-fp8-thinking",
204
+ "score": 2473.33,
205
+ "details": "1276, 5196, 948",
206
+ "highest_tail": 256
207
+ },
208
+ {
209
+ "model": "๐ŸŽฎ gpt-oss-120b",
210
+ "score": 1973.33,
211
+ "details": "696, 3240, 1984",
212
+ "highest_tail": 256
213
+ },
214
+ {
215
+ "model": "๐ŸŽฎ gpt-oss-20b",
216
+ "score": 2040.0,
217
+ "details": "1484, 1432, 3204",
218
  "highest_tail": 256
219
  }
220
  ]
 
223
  "runs": 3,
224
  "results": [
225
  {
226
+ "model": "๐ŸŽฎ claude-3-5-sonnet-20241022",
227
  "score": 14.7,
228
  "details": "16,14,14"
229
  },
230
  {
231
+ "model": "๐ŸŽฎ claude-3-7-sonnet-20250219",
232
  "score": 16.3,
233
  "details": "19,15,15"
234
  },
235
  {
236
+ "model": "๐ŸŽฎ deepseek-r1-0120",
237
  "score": 14.3,
238
  "details": "15,14,14"
239
  },
240
  {
241
+ "model": "๐ŸŽฎ gemini-2.5-flash-preview-04-17",
242
  "score": 16.3,
243
  "details": "20,14,15"
244
  },
245
  {
246
+ "model": "๐ŸŽฎ gemini-2.5-pro-preview-05-06",
247
  "score": 23.3,
248
  "details": "23,23,24"
249
  },
250
  {
251
+ "model": "๐ŸŽฎ grok-3-mini-beta",
252
  "score": 21.3,
253
  "details": "20,15,29"
254
  },
255
  {
256
+ "model": "๐ŸŽฎ llama-4-maverick-17b-128e-instruct-fp8",
257
  "score": 10.3,
258
  "details": "9,10,12"
259
  },
260
  {
261
+ "model": "๐ŸŽฎ gpt-4.1-2025-04-14",
262
  "score": 13.7,
263
  "details": "13,14,14"
264
  },
265
  {
266
+ "model": "๐ŸŽฎ gpt-4o-2024-11-20",
267
  "score": 14,
268
  "details": "18,11,13"
269
  },
270
  {
271
+ "model": "๐ŸŽฎ o1-2024-12-17",
272
  "score": 35,
273
  "details": "35"
274
  },
275
  {
276
+ "model": "๐ŸŽฎ o1-mini-2024-09-12",
277
  "score": 11.7,
278
  "details": "11,11,13"
279
  },
280
  {
281
+ "model": "๐ŸŽฎ o3-2025-04-16",
282
  "score": 42,
283
  "details": "42"
284
  },
285
  {
286
+ "model": "๐ŸŽฎ o4-mini-2025-04-16",
287
  "score": 25.3,
288
  "details": "22,35,19"
289
  },
 
293
  "details": ""
294
  },
295
  {
296
+ "model": "๐ŸŽฎ claude-opus-4-20250514",
297
  "score": 20,
298
  "details": "17,18,25"
299
  },
300
  {
301
+ "model": "๐ŸŽฎ claude-sonnet-4-20250514",
302
  "score": 19.33,
303
  "details": "20,17,21"
304
  },
305
  {
306
+ "model": "๐ŸŽฎ deepseek-r1-0528",
307
  "score": 33.67,
308
  "details": "26,34,41"
309
  },
310
  {
311
+ "model": "๐ŸŽฎ qwen3-235B-A22B-fp8",
312
  "score": 11.67,
313
  "details": "13,14,8"
314
+ },
315
+ {
316
+ "model": "๐ŸŽฎ grok-4-0709",
317
+ "score": 125.67,
318
+ "details": "229,105,43"
319
+ },
320
+ {
321
+ "model": "๐ŸŽฎ kimi-k2-0711-preview",
322
+ "score": 17,
323
+ "details": "20,15,16"
324
+ },
325
+ {
326
+ "model": "๐ŸŽฎ glm-4.5",
327
+ "score": 19.67,
328
+ "details": "21, 21, 17"
329
+ },
330
+ {
331
+ "model": "๐ŸŽฎ qwen3-235B-A22B-fp8-thinking",
332
+ "score": 16,
333
+ "details": "23,9,16"
334
+ },
335
+ {
336
+ "model": "๐ŸŽฎ gpt-oss-120b",
337
+ "score": 12.67,
338
+ "details": "12, 13, 13"
339
+ },
340
+ {
341
+ "model": "๐ŸŽฎ gpt-oss-20b",
342
+ "score": 14.33,
343
+ "details": "14, 13, 16"
344
  }
345
  ]
346
  },
 
348
  "runs": 3,
349
  "results": [
350
  {
351
+ "model": "๐ŸŽฎ claude-3-5-sonnet-20241022",
352
  "score": 106,
353
  "details": "92,165,61"
354
  },
355
  {
356
+ "model": "๐ŸŽฎ claude-3-7-sonnet-20250219",
357
  "score": 484,
358
  "details": "535,428,489"
359
  },
360
  {
361
+ "model": "๐ŸŽฎ deepseek-r1-0120",
362
  "score": 447.3,
363
  "details": "409,436,497"
364
  },
365
  {
366
+ "model": "๐ŸŽฎ gemini-2.5-flash-preview-04-17",
367
  "score": 334.7,
368
  "details": "259,372,373"
369
  },
370
  {
371
+ "model": "๐ŸŽฎ gemini-2.5-pro-preview-05-06",
372
  "score": 416.3,
373
  "details": "411,414,424"
374
  },
375
  {
376
+ "model": "๐ŸŽฎ grok-3-mini-beta",
377
  "score": 254,
378
  "details": "299,332,131"
379
  },
380
  {
381
+ "model": "๐ŸŽฎ llama-4-maverick-17b-128e-instruct-fp8",
382
  "score": 128.7,
383
  "details": "67,139,180"
384
  },
385
  {
386
+ "model": "๐ŸŽฎ gpt-4.1-2025-04-14",
387
  "score": 182,
388
  "details": "163,215,168"
389
  },
390
  {
391
+ "model": "๐ŸŽฎ gpt-4o-2024-11-20",
392
  "score": 147.3,
393
  "details": "131,104,207"
394
  },
395
  {
396
+ "model": "๐ŸŽฎ o1-2024-12-17",
397
  "score": 159,
398
  "details": "159"
399
  },
400
  {
401
+ "model": "๐ŸŽฎ o1-mini-2024-09-12",
402
  "score": 48,
403
  "details": "21,86,37"
404
  },
405
  {
406
+ "model": "๐ŸŽฎ o3-2025-04-16",
407
  "score": 647,
408
  "details": "647"
409
  },
410
  {
411
+ "model": "๐ŸŽฎ o4-mini-2025-04-16",
412
  "score": 487.3,
413
  "details": "259,591,612"
414
  },
 
418
  "details": ""
419
  },
420
  {
421
+ "model": "๐ŸŽฎ claude-opus-4-20250514",
422
  "score": 464,
423
  "details": "593,406,393"
424
  },
425
  {
426
+ "model": "๐ŸŽฎ claude-sonnet-4-20250514",
427
  "score": 478.33,
428
  "details": "545,468,422"
429
  },
430
  {
431
+ "model": "๐ŸŽฎ deepseek-r1-0528",
432
  "score": 491.67,
433
  "details": "464,463,548"
434
  },
435
  {
436
+ "model": "๐ŸŽฎ qwen3-235B-A22B-fp8",
437
  "score": 363.33,
438
  "details": "365,372,353"
439
+ },
440
+ {
441
+ "model": "๐ŸŽฎ grok-4-0709",
442
+ "score": 616.7,
443
+ "details": "586,730,534"
444
+ },
445
+ {
446
+ "model": "๐ŸŽฎ kimi-k2-0711-preview",
447
+ "score": 101,
448
+ "details": "144,31,128"
449
+ },
450
+ {
451
+ "model": "๐ŸŽฎ glm-4.5",
452
+ "score": 324.33,
453
+ "details": "377, 320, 276"
454
+ },
455
+ {
456
+ "model": "๐ŸŽฎ qwen3-235B-A22B-fp8-thinking",
457
+ "score": 437,
458
+ "details": "511, 437, 363"
459
+ },
460
+ {
461
+ "model": "๐ŸŽฎ gpt-oss-120b",
462
+ "score": 441.33,
463
+ "details": "377, 536, 411"
464
+ },
465
+ {
466
+ "model": "๐ŸŽฎ gpt-oss-20b",
467
+ "score": 249.67,
468
+ "details": "243, 262, 244"
469
  }
470
  ]
471
  },
 
473
  "runs": 3,
474
  "results": [
475
  {
476
+ "model": "๐ŸŽฎ claude-3-5-sonnet-20241022",
477
  "score": 0,
478
+ "detail_box_on_target":"0,0,0",
479
  "cracked_levels": "0,0,0"
480
  },
481
  {
482
+ "model": "๐ŸŽฎ claude-3-7-sonnet-20250219",
483
  "score": 2.33,
484
+ "detail_box_on_target":"2,4,1",
485
  "cracked_levels": "1,2,0"
486
  },
487
  {
488
+ "model": "๐ŸŽฎ deepseek-r1-0120",
489
  "score": 1.33,
490
+ "detail_box_on_target":"2,0,2",
491
  "cracked_levels": "1,0,1"
492
  },
493
  {
494
+ "model": "๐ŸŽฎ gemini-2.5-flash-preview-04-17",
495
  "score": 1.67,
496
+ "detail_box_on_target":"3,0,2",
497
  "cracked_levels": "2,0,1"
498
  },
499
  {
500
+ "model": "๐ŸŽฎ gemini-2.5-pro-preview-05-06",
501
  "score": 4.33,
502
+ "detail_box_on_target":"4,4,5",
503
  "cracked_levels": "2,2,3"
504
  },
505
  {
506
+ "model": "๐ŸŽฎ grok-3-mini-beta",
507
  "score": 5.67,
508
+ "detail_box_on_target":"5,6,6",
509
  "cracked_levels": "3,3,3"
510
  },
511
  {
512
+ "model": "๐ŸŽฎ llama-4-maverick-17b-128e-instruct-fp8",
513
  "score": 0,
514
+ "detail_box_on_target":"0,0,0",
515
  "cracked_levels": "0,0,0"
516
  },
517
  {
518
+ "model": "๐ŸŽฎ gpt-4.1-2025-04-14",
519
  "score": 0,
520
+ "detail_box_on_target":"0,0,0",
521
  "cracked_levels": "0,0,0"
522
  },
523
  {
524
+ "model": "๐ŸŽฎ gpt-4o-2024-11-20",
525
  "score": 0,
526
+ "detail_box_on_target":"0,0,0",
527
  "cracked_levels": "0,0,0"
528
  },
529
  {
530
+ "model": "๐ŸŽฎ o1-2024-12-17",
531
  "score": 2.33,
532
+ "detail_box_on_target":"2,2,3",
533
  "cracked_levels": "1,1,2"
534
  },
535
  {
536
+ "model": "๐ŸŽฎ o1-mini-2024-09-12",
537
  "score": 1.33,
538
+ "detail_box_on_target":"1,2,1",
539
  "cracked_levels": "0,1,0"
540
  },
541
  {
542
+ "model": "๐ŸŽฎ o3-2025-04-16",
543
  "score": 8,
544
+ "detail_box_on_target":"10,6",
545
  "cracked_levels": "5,3"
546
  },
547
  {
548
+ "model": "๐ŸŽฎ o4-mini-2025-04-16",
549
  "score": 5.33,
550
+ "detail_box_on_target":"4,6,6",
551
+ "cracked_levels": "2,3,3"
552
  },
553
  {
554
  "model": "random (x30)",
555
  "score": 0,
556
+ "detail_box_on_target":"0,0,0",
557
  "cracked_levels": "0,0,0"
558
  },
559
  {
560
+ "model": "๐ŸŽฎ claude-opus-4-20250514",
561
  "score": 4,
562
+ "detail_box_on_target": "4,4,4",
563
+ "cracked_levels": "2,2,2"
564
  },
565
  {
566
+ "model": "๐ŸŽฎ claude-sonnet-4-20250514",
567
  "score": 3,
568
+ "detail_box_on_target": "2,2,5",
569
+ "cracked_levels": "1,1,3"
570
  },
571
  {
572
+ "model": "๐ŸŽฎ deepseek-r1-0528",
573
  "score": 4.67,
574
+ "detail_box_on_target": "4,4,6",
575
+ "cracked_levels": "2,2,3"
576
  },
577
  {
578
+ "model": "๐ŸŽฎ qwen3-235B-A22B-fp8",
579
  "score": 2.33,
580
+ "detail_box_on_target": "1,2,4",
581
+ "cracked_levels": "0,1,2"
582
+ },
583
+ {
584
+ "model": "๐ŸŽฎ grok-4-0709",
585
+ "score": 6.33,
586
+ "detail_box_on_target": "6,6,7",
587
+ "cracked_levels": "3,3,4"
588
+ },
589
+ {
590
+ "model": "๐ŸŽฎ kimi-k2-0711-preview",
591
+ "score": 0,
592
+ "detail_box_on_target": "0,0,0",
593
+ "cracked_levels": "0,0,0"
594
+ },
595
+ {
596
+ "model": "๐ŸŽฎ glm-4.5",
597
+ "score": 0.33,
598
+ "detail_box_on_target": "0,1,0",
599
+ "cracked_levels": "0,0,0"
600
+ },
601
+ {
602
+ "model": "๐ŸŽฎ qwen3-235B-A22B-fp8-thinking",
603
+ "score": 5.33,
604
+ "detail_box_on_target": "6,4,6",
605
+ "cracked_levels": "3,2,3"
606
+ },
607
+ {
608
+ "model": "๐ŸŽฎ gpt-oss-120b",
609
+ "score": 4,
610
+ "detail_box_on_target": "4",
611
+ "cracked_levels": "2"
612
+ },
613
+ {
614
+ "model": "๐ŸŽฎ gpt-oss-20b",
615
+ "score": 4,
616
+ "detail_box_on_target": "4",
617
+ "cracked_levels": "2"
618
  }
619
  ]
620
  },
 
622
  "runs": 1,
623
  "results": [
624
  {
625
+ "model": "๐ŸŽฎ claude-3-5-sonnet-20241022",
626
  "score": 2,
627
  "progress": "1:2/5",
628
  "evaluator result": "1/3"
629
  },
630
  {
631
+ "model": "๐ŸŽฎ claude-3-7-sonnet-20250219",
632
  "score": 7,
633
  "progress": "2:2/9",
634
  "evaluator result": "5/11"
635
  },
636
  {
637
+ "model": "๐ŸŽฎ deepseek-r1-0120",
638
  "score": 0,
639
  "progress": "0",
640
  "evaluator result": "1/5"
641
  },
642
  {
643
+ "model": "๐ŸŽฎ gemini-2.5-flash-preview-04-17",
644
  "score": 4,
645
  "progress": "1:4/5",
646
  "evaluator result": "1/7"
647
  },
648
  {
649
+ "model": "๐ŸŽฎ gemini-2.5-pro-preview-05-06",
650
  "score": 7,
651
  "progress": "2:2/9",
652
  "evaluator result": "2/3"
653
  },
654
  {
655
+ "model": "๐ŸŽฎ grok-3-mini-beta",
656
  "score": 0,
657
  "progress": "0",
658
  "evaluator result": "0"
659
  },
660
  {
661
+ "model": "๐ŸŽฎ llama-4-maverick-17b-128e-instruct-fp8",
662
  "score": 0,
663
  "progress": "0",
664
  "evaluator result": "0"
665
  },
666
  {
667
+ "model": "๐ŸŽฎ gpt-4.1-2025-04-14",
668
  "score": 2,
669
  "progress": "1:2/5",
670
  "evaluator result": "2/3"
671
  },
672
  {
673
+ "model": "๐ŸŽฎ gpt-4o-2024-11-20",
674
  "score": 0,
675
  "progress": "0",
676
  "evaluator result": "0"
677
  },
678
  {
679
+ "model": "๐ŸŽฎ o1-2024-12-17",
680
  "score": 16,
681
  "progress": "3: 2/8",
682
  "evaluator result": "6/11"
683
  },
684
  {
685
+ "model": "๐ŸŽฎ o1-mini-2024-09-12",
686
  "score": 0,
687
  "progress": "0",
688
  "evaluator result": "1/5"
689
  },
690
  {
691
+ "model": "๐ŸŽฎ o3-2025-04-16",
692
  "score": 16,
693
  "progress": "3: 2/8",
694
  "evaluator result": "1/2"
695
  },
696
  {
697
+ "model": "๐ŸŽฎ o4-mini-2025-04-16",
698
  "score": 4,
699
  "progress": "1:4/5",
700
  "evaluator result": "2/5"
 
706
  "evaluator result": "0"
707
  },
708
  {
709
+ "model": "๐ŸŽฎ claude-opus-4-20250514",
710
  "score": 6,
711
  "details": "6"
712
  },
713
  {
714
+ "model": "๐ŸŽฎ claude-sonnet-4-20250514",
715
  "score": 3.67,
716
  "details": "3,4,4"
717
  },
718
  {
719
+ "model": "๐ŸŽฎ gemini-2.5-flash-preview-05-20",
720
  "score": 4.33,
721
  "details": "3,4,6"
722
  }