rntc Claude commited on
Commit
9f4fde3
·
1 Parent(s): 1e26b4a

Fix critical stability issues in leaderboard

Browse files

- Fix KeyError when tasks missing from evaluation results
- Prevent division by zero in average calculations
- Add safe DataFrame column access in filtering
- Fix file path bugs in subdirectory processing
- Add JSON error handling for malformed queue files
- Improve license access with proper fallbacks
- Make filter columns dynamic based on available data

These fixes prevent major crash scenarios when handling edge cases,
empty data, or malformed evaluation files.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <[email protected]>

app.py CHANGED
@@ -69,23 +69,34 @@ def init_leaderboard(dataframe):
69
  ),
70
  search_columns=["model", "license"],
71
  hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
72
- filter_columns=[
73
- ColumnFilter("precision", type="checkboxgroup", label="Precision"),
74
- ColumnFilter(
75
- "params",
76
- type="slider",
77
- min=0.01,
78
- max=150,
79
- label="Select the number of parameters (B)",
80
- ),
81
- ColumnFilter(
82
- "still_on_hub", type="boolean", label="Deleted/incomplete", default=True
83
- ),
84
- ],
85
  bool_checkboxgroup_label="Hide models",
86
  interactive=False,
87
  )
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  return Leaderboard(
90
  value=dataframe,
91
  datatype=[c.type for c in fields(AutoEvalColumn)],
@@ -96,19 +107,7 @@ def init_leaderboard(dataframe):
96
  ),
97
  search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
98
  hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
99
- filter_columns=[
100
- ColumnFilter("precision", type="checkboxgroup", label="Precision"),
101
- ColumnFilter(
102
- "params",
103
- type="slider",
104
- min=0.01,
105
- max=150,
106
- label="Select the number of parameters (B)",
107
- ),
108
- ColumnFilter(
109
- "still_on_hub", type="boolean", label="Deleted/incomplete", default=True
110
- ),
111
- ],
112
  bool_checkboxgroup_label="Hide models",
113
  interactive=False,
114
  )
 
69
  ),
70
  search_columns=["model", "license"],
71
  hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
72
+ filter_columns=[],
 
 
 
 
 
 
 
 
 
 
 
 
73
  bool_checkboxgroup_label="Hide models",
74
  interactive=False,
75
  )
76
 
77
+ # Build filter columns based on available data
78
+ filter_columns = []
79
+
80
+ # Add precision filter only if precision column has data
81
+ if "precision" in dataframe.columns and not dataframe["precision"].isna().all():
82
+ filter_columns.append(ColumnFilter("precision", type="checkboxgroup", label="Precision"))
83
+
84
+ # Add params filter only if params column has data
85
+ if "params" in dataframe.columns and not dataframe["params"].isna().all():
86
+ filter_columns.append(ColumnFilter(
87
+ "params",
88
+ type="slider",
89
+ min=0.01,
90
+ max=150,
91
+ label="Select the number of parameters (B)",
92
+ ))
93
+
94
+ # Add still_on_hub filter only if column has data
95
+ if "still_on_hub" in dataframe.columns and not dataframe["still_on_hub"].isna().all():
96
+ filter_columns.append(ColumnFilter(
97
+ "still_on_hub", type="boolean", label="Deleted/incomplete", default=True
98
+ ))
99
+
100
  return Leaderboard(
101
  value=dataframe,
102
  datatype=[c.type for c in fields(AutoEvalColumn)],
 
107
  ),
108
  search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
109
  hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
110
+ filter_columns=filter_columns,
 
 
 
 
 
 
 
 
 
 
 
 
111
  bool_checkboxgroup_label="Hide models",
112
  interactive=False,
113
  )
src/display/formatting.py CHANGED
@@ -20,7 +20,12 @@ def styled_message(message):
20
 
21
 
22
  def has_no_nan_values(df, columns):
23
- return df[columns].notna().all(axis=1)
 
 
 
 
 
24
 
25
 
26
  def has_nan_values(df, columns):
 
20
 
21
 
22
  def has_no_nan_values(df, columns):
23
+ if df.empty:
24
+ return pd.Series([], dtype=bool)
25
+ existing_cols = [col for col in columns if col in df.columns]
26
+ if not existing_cols:
27
+ return pd.Series([True] * len(df), index=df.index)
28
+ return df[existing_cols].notna().all(axis=1)
29
 
30
 
31
  def has_nan_values(df, columns):
src/leaderboard/read_evals.py CHANGED
@@ -125,7 +125,8 @@ class EvalResult:
125
 
126
  def to_dict(self):
127
  """Converts the Eval Result to a dict compatible with our dataframe display"""
128
- average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
 
129
  data_dict = {
130
  "eval_name": self.eval_name, # not a column, just a save name,
131
  "precision": self.precision.value.name,
@@ -143,7 +144,7 @@ class EvalResult:
143
  }
144
 
145
  for task in Tasks:
146
- data_dict[task.value.col_name] = self.results[task.value.benchmark]
147
 
148
  return data_dict
149
 
 
125
 
126
  def to_dict(self):
127
  """Converts the Eval Result to a dict compatible with our dataframe display"""
128
+ valid_results = [v for v in self.results.values() if v is not None]
129
+ average = sum(valid_results) / len(valid_results) if valid_results else 0.0
130
  data_dict = {
131
  "eval_name": self.eval_name, # not a column, just a save name,
132
  "precision": self.precision.value.name,
 
144
  }
145
 
146
  for task in Tasks:
147
+ data_dict[task.value.col_name] = self.results.get(task.value.benchmark, None)
148
 
149
  return data_dict
150
 
src/populate.py CHANGED
@@ -49,24 +49,33 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
49
  for entry in entries:
50
  if ".json" in entry:
51
  file_path = os.path.join(save_path, entry)
52
- with open(file_path) as fp:
53
- data = json.load(fp)
 
54
 
55
- data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
56
- data[EvalQueueColumn.revision.name] = data.get("revision", "main")
57
 
58
- all_evals.append(data)
 
 
 
59
  elif ".md" not in entry:
60
  # this is a folder
61
- sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if os.path.isfile(e) and not e.startswith(".")]
 
62
  for sub_entry in sub_entries:
63
  file_path = os.path.join(save_path, entry, sub_entry)
64
- with open(file_path) as fp:
65
- data = json.load(fp)
 
66
 
67
- data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
68
- data[EvalQueueColumn.revision.name] = data.get("revision", "main")
69
- all_evals.append(data)
 
 
 
70
 
71
  pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
72
  running_list = [e for e in all_evals if e["status"] == "RUNNING"]
 
49
  for entry in entries:
50
  if ".json" in entry:
51
  file_path = os.path.join(save_path, entry)
52
+ try:
53
+ with open(file_path) as fp:
54
+ data = json.load(fp)
55
 
56
+ data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
57
+ data[EvalQueueColumn.revision.name] = data.get("revision", "main")
58
 
59
+ all_evals.append(data)
60
+ except (json.JSONDecodeError, KeyError, IOError) as e:
61
+ print(f"Error processing {file_path}: {e}")
62
+ continue
63
  elif ".md" not in entry:
64
  # this is a folder
65
+ sub_entries = [e for e in os.listdir(os.path.join(save_path, entry))
66
+ if os.path.isfile(os.path.join(save_path, entry, e)) and not e.startswith(".")]
67
  for sub_entry in sub_entries:
68
  file_path = os.path.join(save_path, entry, sub_entry)
69
+ try:
70
+ with open(file_path) as fp:
71
+ data = json.load(fp)
72
 
73
+ data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
74
+ data[EvalQueueColumn.revision.name] = data.get("revision", "main")
75
+ all_evals.append(data)
76
+ except (json.JSONDecodeError, KeyError, IOError) as e:
77
+ print(f"Error processing {file_path}: {e}")
78
+ continue
79
 
80
  pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
81
  running_list = [e for e in all_evals if e["status"] == "RUNNING"]
src/submission/submit.py CHANGED
@@ -52,7 +52,11 @@ def add_new_eval(
52
 
53
  # Were the model card and license filled?
54
  try:
55
- license = model_info.cardData["license"]
 
 
 
 
56
  except Exception:
57
  return styled_error("Please select a license for your model")
58
 
 
52
 
53
  # Were the model card and license filled?
54
  try:
55
+ license = "Unknown"
56
+ if hasattr(model_info, 'cardData') and model_info.cardData:
57
+ license = model_info.cardData.get("license", "Unknown")
58
+ if license == "Unknown":
59
+ return styled_error("Please select a license for your model")
60
  except Exception:
61
  return styled_error("Please select a license for your model")
62