rzanoli commited on
Commit
95ac6d4
·
1 Parent(s): a0f2788

Add the function to highlight best per task

Browse files
Files changed (2) hide show
  1. app.py +25 -9
  2. src/leaderboard/read_evals.py +1 -0
app.py CHANGED
@@ -52,6 +52,19 @@ TASK_METADATA_GENERATIVE = {
52
  "REL": {"icon": "🔗", "name": "Relation Extraction", "tooltip": ""},
53
  }
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  def theoretical_performance(df_hash):
57
  """
@@ -141,8 +154,8 @@ def create_line_chart(dataframe):
141
 
142
  # Layout
143
  fig.update_layout(
144
- title="Avg. Combined Performance vs #Params",
145
- xaxis_title="#Params (B)", yaxis_title="Avg. Combined Performance",
146
  template="plotly_white", hovermode="closest",
147
  font=dict(family="Arial", size=10), dragmode=False,
148
  xaxis=dict(tickvals=[0, 25, 50, 75, 100, 125], ticktext=["0", "25", "50", "75", "100"]),
@@ -330,6 +343,9 @@ def init_leaderboard(dataframe, default_selection=None, hidden_columns=None):
330
  # Apply medal assignments
331
  sorted_dataframe["Model"] = create_medal_assignments(sorted_dataframe)
332
 
 
 
 
333
  field_list = fields(AutoEvalColumn)
334
 
335
  return create_leaderboard_base(sorted_dataframe, field_list, hidden_columns)
@@ -342,7 +358,7 @@ def update_task_leaderboard(dataframe, default_selection=None, hidden_columns=No
342
  raise ValueError("Leaderboard DataFrame is empty or None.")
343
 
344
  # Sort and reset index
345
- sorted_dataframe = dataframe.sort_values(by="Combined Performance", ascending=False).reset_index(drop=True)
346
  sorted_dataframe["Rank"] = sorted_dataframe.index + 1
347
 
348
  # Apply medal assignments
@@ -542,12 +558,12 @@ def create_gradio_interface():
542
  f"{task} Prompt Std": "Prompt Std",
543
  f"{task} Best Prompt": "Best Prompt",
544
  f"{task} Best Prompt Id": "Best Prompt Id",
545
- task: "Combined Performance"
546
  }),
547
- default_selection=['Rank', 'Size', 'FS', 'Model', 'Combined Performance', 'Prompt Average',
548
  'Prompt Std', 'Best Prompt', 'Best Prompt Id'],
549
  hidden_columns=[col for col in LEADERBOARD_DF.columns if
550
- col not in ['Rank', 'Size', 'FS', 'Model', 'Combined Performance',
551
  'Prompt Average', 'Prompt Std', 'Best Prompt',
552
  'Best Prompt Id']]
553
  )
@@ -567,12 +583,12 @@ def create_gradio_interface():
567
  f"{task} Prompt Std": "Prompt Std",
568
  f"{task} Best Prompt": "Best Prompt",
569
  f"{task} Best Prompt Id": "Best Prompt Id",
570
- task: "Combined Performance"
571
  }),
572
- default_selection=['Rank', 'Size', 'FS', 'Model', 'Combined Performance', 'Prompt Average',
573
  'Prompt Std', 'Best Prompt', 'Best Prompt Id'],
574
  hidden_columns=[col for col in LEADERBOARD_DF.columns if
575
- col not in ['Rank', 'Size', 'FS', 'Model', 'Combined Performance',
576
  'Prompt Average', 'Prompt Std', 'Best Prompt',
577
  'Best Prompt Id']]
578
  )
 
52
  "REL": {"icon": "🔗", "name": "Relation Extraction", "tooltip": ""},
53
  }
54
 
55
+ def highlight_best_per_task(df):
56
+ """Add 🟡 symbol next to the maximum value in each task column"""
57
+
58
+ task_columns = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
59
+
60
+ df = df.copy()
61
+ for col in task_columns:
62
+ if col in df.columns:
63
+ max_val = df[col].max()
64
+ df[col] = df[col].apply(
65
+ lambda x: f"{x:.1f}🔺" if x == max_val else f"{x:.1f}"
66
+ )
67
+ return df
68
 
69
  def theoretical_performance(df_hash):
70
  """
 
154
 
155
  # Layout
156
  fig.update_layout(
157
+ title="Average Performance vs #Params",
158
+ xaxis_title="#Params (B)", yaxis_title="Average Performance",
159
  template="plotly_white", hovermode="closest",
160
  font=dict(family="Arial", size=10), dragmode=False,
161
  xaxis=dict(tickvals=[0, 25, 50, 75, 100, 125], ticktext=["0", "25", "50", "75", "100"]),
 
343
  # Apply medal assignments
344
  sorted_dataframe["Model"] = create_medal_assignments(sorted_dataframe)
345
 
346
+ # Show the best values for tasks
347
+ sorted_dataframe = highlight_best_per_task(sorted_dataframe)
348
+
349
  field_list = fields(AutoEvalColumn)
350
 
351
  return create_leaderboard_base(sorted_dataframe, field_list, hidden_columns)
 
358
  raise ValueError("Leaderboard DataFrame is empty or None.")
359
 
360
  # Sort and reset index
361
+ sorted_dataframe = dataframe.sort_values(by="Comb. Perf. ⬆️", ascending=False).reset_index(drop=True)
362
  sorted_dataframe["Rank"] = sorted_dataframe.index + 1
363
 
364
  # Apply medal assignments
 
558
  f"{task} Prompt Std": "Prompt Std",
559
  f"{task} Best Prompt": "Best Prompt",
560
  f"{task} Best Prompt Id": "Best Prompt Id",
561
+ task: "Comb. Perf. ⬆️"
562
  }),
563
+ default_selection=['Rank', 'Size', 'FS', 'Model', 'Comb. Perf. ⬆️', 'Prompt Average',
564
  'Prompt Std', 'Best Prompt', 'Best Prompt Id'],
565
  hidden_columns=[col for col in LEADERBOARD_DF.columns if
566
+ col not in ['Rank', 'Size', 'FS', 'Model', 'Comb. Perf. ⬆️',
567
  'Prompt Average', 'Prompt Std', 'Best Prompt',
568
  'Best Prompt Id']]
569
  )
 
583
  f"{task} Prompt Std": "Prompt Std",
584
  f"{task} Best Prompt": "Best Prompt",
585
  f"{task} Best Prompt Id": "Best Prompt Id",
586
+ task: "Comb. Perf. ⬆️"
587
  }),
588
+ default_selection=['Rank', 'Size', 'FS', 'Model', 'Comb. Perf. ⬆️', 'Prompt Average',
589
  'Prompt Std', 'Best Prompt', 'Best Prompt Id'],
590
  hidden_columns=[col for col in LEADERBOARD_DF.columns if
591
+ col not in ['Rank', 'Size', 'FS', 'Model', 'Comb. Perf. ⬆️',
592
  'Prompt Average', 'Prompt Std', 'Best Prompt',
593
  'Best Prompt Id']]
594
  )
src/leaderboard/read_evals.py CHANGED
@@ -109,6 +109,7 @@ class EvalResult:
109
  results[task.benchmark] = int(v[task.metric_type][-1:])
110
  else:
111
  #results[task.benchmark] = f"{v[task.metric_type]:.2f}" # Ensure two decimals for display
 
112
  results[task.benchmark] = float(v[task.metric_type])
113
  #value = float(v[task.metric_type])
114
  #results[task.benchmark] = round(value, 2) # Arrotonda a 2 decimali
 
109
  results[task.benchmark] = int(v[task.metric_type][-1:])
110
  else:
111
  #results[task.benchmark] = f"{v[task.metric_type]:.2f}" # Ensure two decimals for display
112
+ #results[task.benchmark] = float(v[task.metric_type])
113
  results[task.benchmark] = float(v[task.metric_type])
114
  #value = float(v[task.metric_type])
115
  #results[task.benchmark] = round(value, 2) # Arrotonda a 2 decimali