rntc commited on
Commit
ad128a8
·
1 Parent(s): a18de40

Handle empty DataFrame case in leaderboard

Browse files
Files changed (1) hide show
  1. src/populate.py +18 -9
src/populate.py CHANGED
@@ -15,17 +15,26 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
15
  all_data_json = [v.to_dict() for v in raw_data]
16
 
17
  df = pd.DataFrame.from_records(all_data_json)
 
 
 
 
 
 
 
18
  # Sort by the first task (EMEA NER) since we don't have an average for NER tasks
19
  # If no results exist yet, just sort by model name
20
- if not df.empty:
21
- first_task = list(Tasks)[0] # emea_ner
22
- task_col_name = getattr(AutoEvalColumn, first_task.name).name
23
- if task_col_name in df.columns:
24
- df = df.sort_values(by=[task_col_name], ascending=False)
25
- else:
26
- # Fallback to sorting by model name if no task results yet
27
- df = df.sort_values(by=[AutoEvalColumn.model.name], ascending=True)
28
- df = df[cols].round(decimals=2)
 
 
29
 
30
  # filter out if any of the benchmarks have not been produced
31
  df = df[has_no_nan_values(df, benchmark_cols)]
 
15
  all_data_json = [v.to_dict() for v in raw_data]
16
 
17
  df = pd.DataFrame.from_records(all_data_json)
18
+
19
+ # Handle empty DataFrame case
20
+ if df.empty:
21
+ # Create empty DataFrame with correct columns
22
+ df = pd.DataFrame(columns=cols)
23
+ return df
24
+
25
  # Sort by the first task (EMEA NER) since we don't have an average for NER tasks
26
  # If no results exist yet, just sort by model name
27
+ first_task = list(Tasks)[0] # emea_ner
28
+ task_col_name = getattr(AutoEvalColumn, first_task.name).name
29
+ if task_col_name in df.columns:
30
+ df = df.sort_values(by=[task_col_name], ascending=False)
31
+ else:
32
+ # Fallback to sorting by model name if no task results yet
33
+ df = df.sort_values(by=[AutoEvalColumn.model.name], ascending=True)
34
+
35
+ # Only select columns that exist in the DataFrame
36
+ available_cols = [col for col in cols if col in df.columns]
37
+ df = df[available_cols].round(decimals=2)
38
 
39
  # filter out if any of the benchmarks have not been produced
40
  df = df[has_no_nan_values(df, benchmark_cols)]