Shiyu Zhao commited on
Commit
86d5a63
·
1 Parent(s): 5ac3c3f

Update space

Browse files
Files changed (1) hide show
  1. app.py +108 -35
app.py CHANGED
@@ -262,39 +262,81 @@ def scan_submissions_directory():
262
  repo_id=REPO_ID,
263
  repo_type="space"
264
  )
 
265
  repo_files = [f for f in all_files if f.startswith('submissions/')]
266
  except Exception as e:
267
  print(f"Error listing repository contents: {str(e)}")
268
  return submissions_by_split
269
-
270
- # Process submissions and update model types
 
271
  for filepath in repo_files:
272
- if filepath.endswith('metadata.json'):
273
- try:
274
- submission_data = read_json_from_hub(api, REPO_ID, filepath)
275
- if submission_data:
276
- method_name = submission_data.get('Method Name')
277
- model_type = submission_data.get('Model Type')
278
-
279
- # If model type is specified in metadata, use it
280
- if method_name and model_type:
281
- # Check if method exists in any other category
282
- existing_type = get_model_type_for_method(method_name)
283
-
284
- # If method doesn't exist in any category, add it to the specified category
285
- if existing_type == 'Others' and model_type in model_types:
286
- if method_name not in model_types[model_type]:
287
- model_types[model_type].append(method_name)
288
-
289
- # Add submission to appropriate split
290
- split = submission_data.get('Split')
291
- if split in submissions_by_split:
292
- submissions_by_split[split].append(submission_data)
293
 
294
- except Exception as e:
295
- print(f"Error processing metadata file {filepath}: {str(e)}")
 
 
 
 
 
 
 
 
 
 
296
  continue
297
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
  return submissions_by_split
299
 
300
  except Exception as e:
@@ -302,25 +344,49 @@ def scan_submissions_directory():
302
  return None
303
 
304
  def initialize_leaderboard():
 
 
 
305
  global df_synthesized_full, df_synthesized_10, df_human_generated
306
 
307
  try:
308
- # Initialize with baseline results
309
  df_synthesized_full = pd.DataFrame(data_synthesized_full)
310
  df_synthesized_10 = pd.DataFrame(data_synthesized_10)
311
  df_human_generated = pd.DataFrame(data_human_generated)
312
 
313
  print("Initialized with baseline results")
314
 
315
- # Scan submissions directory and update 'Others' category
316
  submissions = scan_submissions_directory()
317
  if submissions:
318
- for split_submissions in submissions.values():
319
  for submission in split_submissions:
320
- method_name = submission.get('Method Name')
321
- method_exists = any(method_name in models for models in model_types.values())
322
- if not method_exists:
323
- model_types['Others'].append(method_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
 
325
  print("Leaderboard initialization complete")
326
 
@@ -705,11 +771,18 @@ def process_submission(
705
 
706
 
707
  def filter_by_model_type(df, selected_types):
 
 
 
708
  if not selected_types:
709
  return df.head(0)
 
 
710
  selected_models = []
711
- for type in selected_types:
712
- selected_models.extend(model_types[type])
 
 
713
  return df[df['Method'].isin(selected_models)]
714
 
715
  def format_dataframe(df, dataset):
 
262
  repo_id=REPO_ID,
263
  repo_type="space"
264
  )
265
+ # Filter for files in submissions directory
266
  repo_files = [f for f in all_files if f.startswith('submissions/')]
267
  except Exception as e:
268
  print(f"Error listing repository contents: {str(e)}")
269
  return submissions_by_split
270
+
271
+ # Group files by team folders
272
+ folder_files = {}
273
  for filepath in repo_files:
274
+ parts = filepath.split('/')
275
+ if len(parts) < 3: # Need at least submissions/team_folder/file
276
+ continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
 
278
+ folder_name = parts[1] # team_folder name
279
+ if folder_name not in folder_files:
280
+ folder_files[folder_name] = []
281
+ folder_files[folder_name].append(filepath)
282
+
283
+ # Process each team folder
284
+ for folder_name, files in folder_files.items():
285
+ try:
286
+ # Find latest.json in this folder
287
+ latest_file = next((f for f in files if f.endswith('latest.json')), None)
288
+ if not latest_file:
289
+ print(f"No latest.json found in {folder_name}")
290
  continue
291
+
292
+ # Read latest.json
293
+ latest_info = read_json_from_hub(api, REPO_ID, latest_file)
294
+ if not latest_info:
295
+ print(f"Failed to read latest.json for {folder_name}")
296
+ continue
297
+
298
+ timestamp = latest_info.get('latest_submission')
299
+ if not timestamp:
300
+ print(f"No timestamp found in latest.json for {folder_name}")
301
+ continue
302
+
303
+ # Find metadata file for latest submission
304
+ metadata_file = next(
305
+ (f for f in files if f.endswith(f'metadata_{timestamp}.json')),
306
+ None
307
+ )
308
+ if not metadata_file:
309
+ print(f"No matching metadata file found for {folder_name} timestamp {timestamp}")
310
+ continue
311
+
312
+ # Read metadata file
313
+ submission_data = read_json_from_hub(api, REPO_ID, metadata_file)
314
+ if not submission_data:
315
+ print(f"Failed to read metadata for {folder_name}")
316
+ continue
317
+
318
+ if latest_info.get('status') != 'approved':
319
+ print(f"Skipping unapproved submission in {folder_name}")
320
+ continue
321
+
322
+ # Add to submissions by split
323
+ split = submission_data.get('Split')
324
+ if split in submissions_by_split:
325
+ submissions_by_split[split].append(submission_data)
326
+
327
+ # Update model types if necessary
328
+ method_name = submission_data.get('Method Name')
329
+ model_type = submission_data.get('Model Type', 'Others')
330
+
331
+ # Add to model type if it's a new method
332
+ method_exists = any(method_name in methods for methods in model_types.values())
333
+ if not method_exists and model_type in model_types:
334
+ model_types[model_type].append(method_name)
335
+
336
+ except Exception as e:
337
+ print(f"Error processing folder {folder_name}: {str(e)}")
338
+ continue
339
+
340
  return submissions_by_split
341
 
342
  except Exception as e:
 
344
  return None
345
 
346
  def initialize_leaderboard():
347
+ """
348
+ Initialize the leaderboard with baseline results and submitted results.
349
+ """
350
  global df_synthesized_full, df_synthesized_10, df_human_generated
351
 
352
  try:
353
+ # First, initialize with baseline results
354
  df_synthesized_full = pd.DataFrame(data_synthesized_full)
355
  df_synthesized_10 = pd.DataFrame(data_synthesized_10)
356
  df_human_generated = pd.DataFrame(data_human_generated)
357
 
358
  print("Initialized with baseline results")
359
 
360
+ # Then scan and add submitted results
361
  submissions = scan_submissions_directory()
362
  if submissions:
363
+ for split, split_submissions in submissions.items():
364
  for submission in split_submissions:
365
+ if submission.get('results'): # Make sure we have results
366
+ # Update appropriate DataFrame based on split
367
+ if split == 'test':
368
+ df_to_update = df_synthesized_full
369
+ elif split == 'test-0.1':
370
+ df_to_update = df_synthesized_10
371
+ else: # human_generated_eval
372
+ df_to_update = df_human_generated
373
+
374
+ # Prepare new row data
375
+ new_row = {
376
+ 'Method': submission['Method Name'],
377
+ f'STARK-{submission["Dataset"].upper()}_Hit@1': submission['results']['hit@1'],
378
+ f'STARK-{submission["Dataset"].upper()}_Hit@5': submission['results']['hit@5'],
379
+ f'STARK-{submission["Dataset"].upper()}_R@20': submission['results']['recall@20'],
380
+ f'STARK-{submission["Dataset"].upper()}_MRR': submission['results']['mrr']
381
+ }
382
+
383
+ # Update existing row or add new one
384
+ method_mask = df_to_update['Method'] == submission['Method Name']
385
+ if method_mask.any():
386
+ for col in new_row:
387
+ df_to_update.loc[method_mask, col] = new_row[col]
388
+ else:
389
+ df_to_update.loc[len(df_to_update)] = new_row
390
 
391
  print("Leaderboard initialization complete")
392
 
 
771
 
772
 
773
  def filter_by_model_type(df, selected_types):
774
+ """
775
+ Filter DataFrame by selected model types, including submitted models.
776
+ """
777
  if not selected_types:
778
  return df.head(0)
779
+
780
+ # Get all models from selected types
781
  selected_models = []
782
+ for type_name in selected_types:
783
+ selected_models.extend(model_types[type_name])
784
+
785
+ # Filter DataFrame to include only selected models
786
  return df[df['Method'].isin(selected_models)]
787
 
788
  def format_dataframe(df, dataset):