Sealical commited on
Commit
0c53877
Β·
1 Parent(s): 3f70be6

update space

Browse files
Files changed (1) hide show
  1. app.py +101 -42
app.py CHANGED
@@ -3,6 +3,9 @@ from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
  import pandas as pd
4
  import os
5
  import json
 
 
 
6
  from huggingface_hub import snapshot_download
7
 
8
  # Constants for PhysicalCodeBench
@@ -72,7 +75,7 @@ SUBMISSION_TEXT = """
72
  β”œβ”€β”€ evaluation_results/ # Directory containing all result files
73
  └── PhysCodeEval_results.json # Main evaluation results file
74
  ```
75
- 5. Submit a pull request with your results
76
 
77
  Your submission will be verified and added to the leaderboard once approved.
78
  """
@@ -111,7 +114,7 @@ COLUMNS = [
111
  PhysCodeColumn("rank", "number", True, True, False),
112
  PhysCodeColumn("model", "str", True, True, False),
113
  PhysCodeColumn("model_type", "str", True, False, False),
114
- #PhysCodeColumn("params", "number", True, False, False),
115
  PhysCodeColumn("text_score", "number", True, False, False),
116
  PhysCodeColumn("visual_score", "number", True, False, False),
117
  PhysCodeColumn("total_score", "number", True, False, False),
@@ -120,8 +123,7 @@ COLUMNS = [
120
  PhysCodeColumn("execution_success", "number", False, False, False),
121
  PhysCodeColumn("file_generation", "number", False, False, False),
122
  PhysCodeColumn("file_quality", "number", False, False, False),
123
- PhysCodeColumn("submission_date", "date", False, False, False),
124
- PhysCodeColumn("license", "str", False, False, False)
125
  ]
126
 
127
  # Enums for model metadata
@@ -144,7 +146,7 @@ def get_leaderboard_df():
144
  "rank": 1,
145
  "model": "GPT4o",
146
  "model_type": ModelType.CloseSource,
147
- "params": 1000,
148
  "text_score": 16.0,
149
  "visual_score": 18.262,
150
  "total_score": 34.262,
@@ -153,14 +155,13 @@ def get_leaderboard_df():
153
  "execution_success": 10.0,
154
  "file_generation": 3.0,
155
  "file_quality": 3.0,
156
- "submission_date": "2025-01-15",
157
- "license": "Proprietary"
158
  },
159
  {
160
  "rank": 2,
161
  "model": "Gemini-2.0-flash",
162
  "model_type": ModelType.CloseSource,
163
- "params": 450,
164
  "text_score": 15.0,
165
  "visual_score": 16.963,
166
  "total_score": 31.963,
@@ -169,14 +170,13 @@ def get_leaderboard_df():
169
  "execution_success": 9.0,
170
  "file_generation": 3.0,
171
  "file_quality": 3.0,
172
- "submission_date": "2025-01-20",
173
- "license": "Proprietary"
174
  },
175
  {
176
  "rank": 3,
177
  "model": "DS-R1",
178
  "model_type": ModelType.OpenSource,
179
- "params": 32,
180
  "text_score": 14.0,
181
  "visual_score": 15.815,
182
  "total_score": 29.815,
@@ -185,14 +185,13 @@ def get_leaderboard_df():
185
  "execution_success": 8.5,
186
  "file_generation": 3.0,
187
  "file_quality": 2.5,
188
- "submission_date": "2025-01-25",
189
- "license": "Apache 2.0"
190
  },
191
  {
192
  "rank": 4,
193
  "model": "DeepSeek-R1-Distill-Qwen-32B",
194
  "model_type": ModelType.OpenSource,
195
- "params": 32,
196
  "text_score": 12.2,
197
  "visual_score": 15.82,
198
  "total_score": 28.02,
@@ -201,14 +200,13 @@ def get_leaderboard_df():
201
  "execution_success": 7.2,
202
  "file_generation": 2.5,
203
  "file_quality": 2.5,
204
- "submission_date": "2025-01-28",
205
- "license": "Apache 2.0"
206
  },
207
  {
208
  "rank": 5,
209
  "model": "QwQ-32B",
210
  "model_type": ModelType.OpenSource,
211
- "params": 32,
212
  "text_score": 7.1,
213
  "visual_score": 8.964,
214
  "total_score": 16.064,
@@ -217,14 +215,13 @@ def get_leaderboard_df():
217
  "execution_success": 4.1,
218
  "file_generation": 1.5,
219
  "file_quality": 1.5,
220
- "submission_date": "2025-02-05",
221
- "license": "Apache 2.0"
222
  },
223
  {
224
  "rank": 6,
225
  "model": "Qwen-2.5-32B",
226
  "model_type": ModelType.OpenSource,
227
- "params": 32,
228
  "text_score": 0.7,
229
  "visual_score": 1.126,
230
  "total_score": 1.826,
@@ -233,8 +230,7 @@ def get_leaderboard_df():
233
  "execution_success": 0.5,
234
  "file_generation": 0.1,
235
  "file_quality": 0.1,
236
- "submission_date": "2025-02-10",
237
- "license": "Apache 2.0"
238
  }
239
  ]
240
 
@@ -261,25 +257,73 @@ def init_leaderboard(dataframe):
261
  cant_deselect=[c.name for c in COLUMNS if c.never_hidden],
262
  label="Select Columns to Display:",
263
  ),
264
- search_columns=["model", "license"],
265
  hide_columns=[c.name for c in COLUMNS if c.hidden],
266
  filter_columns=[
267
  ColumnFilter("model_type", type="checkboxgroup", label="Model types"),
268
- # ColumnFilter(
269
- # "params",
270
- # type="slider",
271
- # min=0.01,
272
- # max=1500,
273
- # label="Select the number of parameters (B)",
274
- # ),
275
  ],
276
  interactive=False,
277
  )
278
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
  # Submission form handling
280
- def process_submission(model_name, model_type, license_type, submission_link):
 
 
 
 
 
 
 
 
281
  # This would be implemented to handle actual submission processing
282
- return f"Thank you for submitting {model_name}! Your submission will be reviewed and added to the leaderboard once verified."
283
 
284
  # Main application
285
  def create_demo():
@@ -322,27 +366,42 @@ def create_demo():
322
  with gr.TabItem("πŸš€ Submit", id=3):
323
  gr.Markdown(SUBMISSION_TEXT, elem_classes="markdown-text")
324
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325
  with gr.Row():
326
  with gr.Column():
327
- model_name_input = gr.Textbox(label="Model Name")
328
  model_type_input = gr.Dropdown(
329
- choices=["CloseSource", "Open Source", "API"],
330
- label="Model Type",
331
  multiselect=False,
332
  )
333
- #params_input = gr.Number(label="Parameters (billions)")
334
 
335
  with gr.Column():
336
- license_input = gr.Textbox(label="License")
 
337
  submission_link_input = gr.Textbox(label="GitHub Pull Request URL")
338
 
339
- submit_button = gr.Button("Submit")
340
- submission_result = gr.Markdown()
341
 
342
- submit_button.click(
343
  process_submission,
344
- [model_name_input, model_type_input, license_input, submission_link_input],
345
- submission_result,
346
  )
347
 
348
  with gr.Row():
 
3
  import pandas as pd
4
  import os
5
  import json
6
+ import tempfile
7
+ import shutil
8
+ import zipfile
9
  from huggingface_hub import snapshot_download
10
 
11
  # Constants for PhysicalCodeBench
 
75
  β”œβ”€β”€ evaluation_results/ # Directory containing all result files
76
  └── PhysCodeEval_results.json # Main evaluation results file
77
  ```
78
+ 5. Submit your results by uploading a ZIP file below or via the form
79
 
80
  Your submission will be verified and added to the leaderboard once approved.
81
  """
 
114
  PhysCodeColumn("rank", "number", True, True, False),
115
  PhysCodeColumn("model", "str", True, True, False),
116
  PhysCodeColumn("model_type", "str", True, False, False),
117
+ PhysCodeColumn("organization", "str", True, False, False),
118
  PhysCodeColumn("text_score", "number", True, False, False),
119
  PhysCodeColumn("visual_score", "number", True, False, False),
120
  PhysCodeColumn("total_score", "number", True, False, False),
 
123
  PhysCodeColumn("execution_success", "number", False, False, False),
124
  PhysCodeColumn("file_generation", "number", False, False, False),
125
  PhysCodeColumn("file_quality", "number", False, False, False),
126
+ PhysCodeColumn("submission_date", "date", False, False, False)
 
127
  ]
128
 
129
  # Enums for model metadata
 
146
  "rank": 1,
147
  "model": "GPT4o",
148
  "model_type": ModelType.CloseSource,
149
+ "organization": "OpenAI",
150
  "text_score": 16.0,
151
  "visual_score": 18.262,
152
  "total_score": 34.262,
 
155
  "execution_success": 10.0,
156
  "file_generation": 3.0,
157
  "file_quality": 3.0,
158
+ "submission_date": "2025-01-15"
 
159
  },
160
  {
161
  "rank": 2,
162
  "model": "Gemini-2.0-flash",
163
  "model_type": ModelType.CloseSource,
164
+ "organization": "Google",
165
  "text_score": 15.0,
166
  "visual_score": 16.963,
167
  "total_score": 31.963,
 
170
  "execution_success": 9.0,
171
  "file_generation": 3.0,
172
  "file_quality": 3.0,
173
+ "submission_date": "2025-01-20"
 
174
  },
175
  {
176
  "rank": 3,
177
  "model": "DS-R1",
178
  "model_type": ModelType.OpenSource,
179
+ "organization": "DeepSeek",
180
  "text_score": 14.0,
181
  "visual_score": 15.815,
182
  "total_score": 29.815,
 
185
  "execution_success": 8.5,
186
  "file_generation": 3.0,
187
  "file_quality": 2.5,
188
+ "submission_date": "2025-01-25"
 
189
  },
190
  {
191
  "rank": 4,
192
  "model": "DeepSeek-R1-Distill-Qwen-32B",
193
  "model_type": ModelType.OpenSource,
194
+ "organization": "DeepSeek",
195
  "text_score": 12.2,
196
  "visual_score": 15.82,
197
  "total_score": 28.02,
 
200
  "execution_success": 7.2,
201
  "file_generation": 2.5,
202
  "file_quality": 2.5,
203
+ "submission_date": "2025-01-28"
 
204
  },
205
  {
206
  "rank": 5,
207
  "model": "QwQ-32B",
208
  "model_type": ModelType.OpenSource,
209
+ "organization": "QwQ Team",
210
  "text_score": 7.1,
211
  "visual_score": 8.964,
212
  "total_score": 16.064,
 
215
  "execution_success": 4.1,
216
  "file_generation": 1.5,
217
  "file_quality": 1.5,
218
+ "submission_date": "2025-02-05"
 
219
  },
220
  {
221
  "rank": 6,
222
  "model": "Qwen-2.5-32B",
223
  "model_type": ModelType.OpenSource,
224
+ "organization": "Alibaba",
225
  "text_score": 0.7,
226
  "visual_score": 1.126,
227
  "total_score": 1.826,
 
230
  "execution_success": 0.5,
231
  "file_generation": 0.1,
232
  "file_quality": 0.1,
233
+ "submission_date": "2025-02-10"
 
234
  }
235
  ]
236
 
 
257
  cant_deselect=[c.name for c in COLUMNS if c.never_hidden],
258
  label="Select Columns to Display:",
259
  ),
260
+ search_columns=["model", "organization"],
261
  hide_columns=[c.name for c in COLUMNS if c.hidden],
262
  filter_columns=[
263
  ColumnFilter("model_type", type="checkboxgroup", label="Model types"),
264
+ ColumnFilter("organization", type="checkboxgroup", label="Organizations"),
 
 
 
 
 
 
265
  ],
266
  interactive=False,
267
  )
268
 
269
+ # Function to handle ZIP file upload and extraction
270
+ def process_zip_submission(zip_file):
271
+ if zip_file is None:
272
+ return "No file uploaded. Please upload a ZIP file containing your submission."
273
+
274
+ # Create temp directory for extraction
275
+ temp_dir = tempfile.mkdtemp()
276
+
277
+ try:
278
+ # Extract the zip file
279
+ with zipfile.ZipFile(zip_file.name, 'r') as zip_ref:
280
+ zip_ref.extractall(temp_dir)
281
+
282
+ # Check for required files
283
+ model_info_path = os.path.join(temp_dir, "model_info.json")
284
+ results_json_path = os.path.join(temp_dir, "PhysCodeEval_results.json")
285
+
286
+ if not os.path.exists(model_info_path):
287
+ return "Error: model_info.json not found in the ZIP file."
288
+
289
+ if not os.path.exists(results_json_path):
290
+ return "Error: PhysCodeEval_results.json not found in the ZIP file."
291
+
292
+ # Load model info
293
+ with open(model_info_path, 'r') as f:
294
+ model_info = json.load(f)
295
+
296
+ # Check for required model info fields
297
+ required_fields = ["model_name", "model_type", "organization"]
298
+ missing_fields = [field for field in required_fields if field not in model_info]
299
+
300
+ if missing_fields:
301
+ return f"Error: Missing required fields in model_info.json: {', '.join(missing_fields)}"
302
+
303
+ # TODO: Process the submission files (this would involve your validation logic)
304
+
305
+ return f"Successfully processed submission for {model_info['model_name']} by {model_info['organization']}. Your submission will be reviewed and added to the leaderboard once approved."
306
+
307
+ except zipfile.BadZipFile:
308
+ return "Error: Invalid ZIP file."
309
+ except Exception as e:
310
+ return f"Error processing submission: {str(e)}"
311
+ finally:
312
+ # Clean up
313
+ shutil.rmtree(temp_dir)
314
+
315
  # Submission form handling
316
+ def process_submission(model_name, model_type, organization, team_name, email, submission_link):
317
+ # Check for required fields
318
+ if not model_name:
319
+ return "Error: Model name is required."
320
+ if not model_type:
321
+ return "Error: Model type is required."
322
+ if not email:
323
+ return "Error: Contact email is required."
324
+
325
  # This would be implemented to handle actual submission processing
326
+ return f"Thank you for submitting {model_name} from {organization or team_name}! Your submission will be reviewed and added to the leaderboard once verified. We will contact you at {email} if we need additional information."
327
 
328
  # Main application
329
  def create_demo():
 
366
  with gr.TabItem("πŸš€ Submit", id=3):
367
  gr.Markdown(SUBMISSION_TEXT, elem_classes="markdown-text")
368
 
369
+ gr.Markdown("### Option 1: Upload Submission ZIP File")
370
+ with gr.Row():
371
+ zip_file_input = gr.File(label="Upload submission ZIP file")
372
+
373
+ zip_submit_button = gr.Button("Submit ZIP File")
374
+ zip_submission_result = gr.Markdown()
375
+
376
+ zip_submit_button.click(
377
+ process_zip_submission,
378
+ [zip_file_input],
379
+ zip_submission_result,
380
+ )
381
+
382
+ gr.Markdown("### Option 2: Submit Form")
383
  with gr.Row():
384
  with gr.Column():
385
+ model_name_input = gr.Textbox(label="Model Name*")
386
  model_type_input = gr.Dropdown(
387
+ choices=["Open Source", "Close Source", "API", "Proprietary"],
388
+ label="Model Type*",
389
  multiselect=False,
390
  )
391
+ organization_input = gr.Textbox(label="Organization (if applicable)")
392
 
393
  with gr.Column():
394
+ team_name_input = gr.Textbox(label="Team Name (if applicable)")
395
+ email_input = gr.Textbox(label="Contact Email*")
396
  submission_link_input = gr.Textbox(label="GitHub Pull Request URL")
397
 
398
+ form_submit_button = gr.Button("Submit Form")
399
+ form_submission_result = gr.Markdown()
400
 
401
+ form_submit_button.click(
402
  process_submission,
403
+ [model_name_input, model_type_input, organization_input, team_name_input, email_input, submission_link_input],
404
+ form_submission_result,
405
  )
406
 
407
  with gr.Row():