apsys commited on
Commit
29a8d4f
·
1 Parent(s): 9a237d2

looks good !

Browse files
Files changed (2) hide show
  1. app.py +338 -33
  2. src/display/utils.py +29 -0
app.py CHANGED
@@ -33,7 +33,9 @@ from src.display.utils import (
33
  ModelType,
34
  Precision,
35
  WeightType,
36
- GuardModelType
 
 
37
  )
38
  from src.display.formatting import styled_message, styled_error, styled_warning
39
  from src.envs import (
@@ -69,9 +71,55 @@ except Exception as e:
69
 
70
  print(DISPLAY_COLS)
71
 
72
- def init_leaderboard(dataframe):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  """
74
- Initialize the leaderboard component.
75
  """
76
  if dataframe is None or dataframe.empty:
77
  # Create an empty dataframe with the right columns
@@ -79,26 +127,174 @@ def init_leaderboard(dataframe):
79
  dataframe = pd.DataFrame(columns=columns)
80
  logger.warning("Initializing empty leaderboard")
81
 
82
- print("\n\n", "dataframe", dataframe, "--------------------------------\n\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- return Leaderboard(
85
- value=dataframe,
86
- datatype=[getattr(GUARDBENCH_COLUMN, col).type for col in DISPLAY_COLS],
87
- select_columns=SelectColumns(
88
- default_selection=[getattr(GUARDBENCH_COLUMN, col).name for col in DISPLAY_COLS],
89
- cant_deselect=[getattr(GUARDBENCH_COLUMN, col).name for col in NEVER_HIDDEN_COLS],
90
- label="Select Columns to Display:",
91
- ),
92
- search_columns=[GUARDBENCH_COLUMN.model_name.name],
93
- hide_columns=[getattr(GUARDBENCH_COLUMN, col).name for col in HIDDEN_COLS],
94
- filter_columns=[
95
- ColumnFilter(GUARDBENCH_COLUMN.model_type.name, type="checkboxgroup", label="Model types"),
96
- ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  interactive=False,
98
- render=True,
 
 
99
  )
100
 
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  def submit_results(
103
  model_name: str,
104
  base_model: str,
@@ -162,25 +358,25 @@ def refresh_data(version=CURRENT_VERSION):
162
  main_df = get_leaderboard_df(version=version)
163
  category_dfs = [get_category_leaderboard_df(category, version=version) for category in CATEGORIES]
164
 
165
- # For Leaderboard components, we need to return just the dataframes
166
- # The component will handle the update internally
167
- return dict(
168
- value=main_df
169
- ), *[dict(value=df) for df in category_dfs]
170
 
171
  except Exception as e:
172
  logger.error(f"Error in scheduled refresh: {e}")
173
- return dict(value=leaderboard.value), *[dict(value=tab.children[0].value)
174
- for tab in category_tabs.children[1:]]
175
 
176
 
177
  def update_leaderboards(version):
178
  """
179
  Update all leaderboard components with data for the selected version.
180
  """
181
- new_df = get_leaderboard_df(version=version)
182
- category_dfs = [get_category_leaderboard_df(category, version=version) for category in CATEGORIES]
183
- return [init_leaderboard(new_df)] + [init_leaderboard(df) for df in category_dfs]
 
 
 
 
184
 
185
 
186
  def create_performance_plot(selected_models, category, metric="f1_binary", version=CURRENT_VERSION):
@@ -309,25 +505,132 @@ with demo:
309
  scale=1
310
  )
311
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
  # Create tabs for each category
313
  with gr.Tabs(elem_classes="category-tabs") as category_tabs:
314
  # First tab for average metrics across all categories
315
  with gr.TabItem("📊 Overall Performance", elem_id="overall-tab"):
316
- print("LEADERBOARD_DF", LEADERBOARD_DF)
317
  leaderboard = init_leaderboard(LEADERBOARD_DF)
318
 
319
  # Create a tab for each category
320
  for category in CATEGORIES:
321
  with gr.TabItem(f"{category}", elem_id=f"category-{category.lower().replace(' ', '-')}-tab"):
322
- print("category DF", category)
323
  category_df = get_category_leaderboard_df(category, version=CURRENT_VERSION)
324
- print("category DF", category_df)
325
  category_leaderboard = init_leaderboard(category_df)
326
 
 
 
 
 
 
 
 
327
  # Refresh button functionality
328
  refresh_button.click(
329
- fn=refresh_data,
330
- inputs=[],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331
  outputs=[leaderboard] + [category_tabs.children[i].children[0] for i in range(1, len(CATEGORIES) + 1)]
332
  )
333
 
@@ -494,3 +797,5 @@ scheduler.start()
494
  if __name__ == "__main__":
495
 
496
  demo.launch(server_name="0.0.0.0", server_port=7860, share=True)
 
 
 
33
  ModelType,
34
  Precision,
35
  WeightType,
36
+ GuardModelType,
37
+ get_all_column_choices,
38
+ get_default_visible_columns,
39
  )
40
  from src.display.formatting import styled_message, styled_error, styled_warning
41
  from src.envs import (
 
71
 
72
  print(DISPLAY_COLS)
73
 
74
+ # Define the update_column_choices function before initializing the leaderboard components
75
+ def update_column_choices(df):
76
+ """Update column choices based on what's actually in the dataframe"""
77
+ if df is None or df.empty:
78
+ return get_all_column_choices()
79
+
80
+ # Get columns that actually exist in the dataframe
81
+ existing_columns = list(df.columns)
82
+
83
+ # Get all possible columns with their display names
84
+ all_columns = get_all_column_choices()
85
+
86
+ # Filter to only include columns that exist in the dataframe
87
+ valid_columns = [(col_name, display_name) for col_name, display_name in all_columns
88
+ if col_name in existing_columns]
89
+
90
+ # Return default if there are no valid columns
91
+ if not valid_columns:
92
+ return get_all_column_choices()
93
+
94
+ return valid_columns
95
+
96
+ # Update the column_selector initialization
97
+ def get_initial_columns():
98
+ """Get initial columns to show in the dropdown"""
99
+ try:
100
+ # Get available columns in the main dataframe
101
+ available_cols = list(LEADERBOARD_DF.columns)
102
+ logger.info(f"Available columns in LEADERBOARD_DF: {available_cols}")
103
+
104
+ # If dataframe is empty, use default visible columns
105
+ if not available_cols:
106
+ return get_default_visible_columns()
107
+
108
+ # Get default visible columns that actually exist in the dataframe
109
+ valid_defaults = [col for col in get_default_visible_columns() if col in available_cols]
110
+
111
+ # If none of the defaults exist, return all available columns
112
+ if not valid_defaults:
113
+ return available_cols
114
+
115
+ return valid_defaults
116
+ except Exception as e:
117
+ logger.error(f"Error getting initial columns: {e}")
118
+ return get_default_visible_columns()
119
+
120
+ def init_leaderboard(dataframe, visible_columns=None):
121
  """
122
+ Initialize a standard Gradio Dataframe component for the leaderboard.
123
  """
124
  if dataframe is None or dataframe.empty:
125
  # Create an empty dataframe with the right columns
 
127
  dataframe = pd.DataFrame(columns=columns)
128
  logger.warning("Initializing empty leaderboard")
129
 
130
+ # print("\n\n", "dataframe", dataframe, "--------------------------------\n\n")
131
+
132
+ # Determine which columns to display
133
+ display_column_names = [getattr(GUARDBENCH_COLUMN, col).name for col in DISPLAY_COLS]
134
+ hidden_column_names = [getattr(GUARDBENCH_COLUMN, col).name for col in HIDDEN_COLS]
135
+
136
+ # Columns that should always be shown
137
+ always_visible = [getattr(GUARDBENCH_COLUMN, col).name for col in NEVER_HIDDEN_COLS]
138
+
139
+ # Use provided visible columns if specified, otherwise use default
140
+ if visible_columns is None:
141
+ # Determine which columns to show initially
142
+ visible_columns = [col for col in display_column_names if col not in hidden_column_names]
143
+
144
+ # Always include the never-hidden columns
145
+ for col in always_visible:
146
+ if col not in visible_columns and col in dataframe.columns:
147
+ visible_columns.append(col)
148
+
149
+ # Make sure we only include columns that actually exist in the dataframe
150
+ visible_columns = [col for col in visible_columns if col in dataframe.columns]
151
+
152
+ # Map GuardBench column types to Gradio's expected datatype strings
153
+ # Valid Gradio datatypes are: 'str', 'number', 'bool', 'date', 'markdown', 'html', 'image'
154
+ type_mapping = {
155
+ 'text': 'str',
156
+ 'number': 'number',
157
+ 'bool': 'bool',
158
+ 'date': 'date',
159
+ 'markdown': 'markdown',
160
+ 'html': 'html',
161
+ 'image': 'image'
162
+ }
163
 
164
+ # Create a list of datatypes in the format Gradio expects
165
+ datatypes = []
166
+ for col in visible_columns:
167
+ # Find the corresponding GUARDBENCH_COLUMN entry
168
+ col_type = None
169
+ for display_col in DISPLAY_COLS:
170
+ if getattr(GUARDBENCH_COLUMN, display_col).name == col:
171
+ orig_type = getattr(GUARDBENCH_COLUMN, display_col).type
172
+ # Map to Gradio's expected types
173
+ col_type = type_mapping.get(orig_type, 'str')
174
+ break
175
+
176
+ # Default to 'str' if type not found or not mappable
177
+ if col_type is None:
178
+ col_type = 'str'
179
+
180
+ datatypes.append(col_type)
181
+
182
+ # Create a dummy column for search functionality if it doesn't exist
183
+ if 'search_dummy' not in dataframe.columns:
184
+ dataframe['search_dummy'] = dataframe.apply(
185
+ lambda row: ' '.join(str(val) for val in row.values if pd.notna(val)),
186
+ axis=1
187
+ )
188
+
189
+ # Select only the visible columns for display
190
+ visible_columns.remove('model_name')
191
+ visible_columns = ['model_name'] + visible_columns
192
+ display_df = dataframe[visible_columns].copy()
193
+
194
+ return gr.Dataframe(
195
+ value=display_df,
196
+ headers=visible_columns,
197
+ datatype=datatypes, # Now using the correct format
198
  interactive=False,
199
+ wrap=True,
200
+ elem_id="leaderboard-table",
201
+ row_count=len(display_df)
202
  )
203
 
204
 
205
+ def search_filter_leaderboard(df, search_query="", model_types=None, version=CURRENT_VERSION):
206
+ """
207
+ Filter the leaderboard based on search query and model types.
208
+ """
209
+ if df is None or df.empty:
210
+ return df
211
+
212
+ filtered_df = df.copy()
213
+
214
+ # Add search dummy column if it doesn't exist
215
+ if 'search_dummy' not in filtered_df.columns:
216
+ filtered_df['search_dummy'] = filtered_df.apply(
217
+ lambda row: ' '.join(str(val) for val in row.values if pd.notna(val)),
218
+ axis=1
219
+ )
220
+
221
+ # Apply model type filter
222
+ if model_types and len(model_types) > 0:
223
+ filtered_df = filtered_df[filtered_df[GUARDBENCH_COLUMN.model_type.name].isin(model_types)]
224
+
225
+ # Apply search query
226
+ if search_query:
227
+ search_terms = [term.strip() for term in search_query.split(";") if term.strip()]
228
+ if search_terms:
229
+ combined_mask = None
230
+ for term in search_terms:
231
+ mask = filtered_df['search_dummy'].str.contains(term, case=False, na=False)
232
+ if combined_mask is None:
233
+ combined_mask = mask
234
+ else:
235
+ combined_mask = combined_mask | mask
236
+
237
+ if combined_mask is not None:
238
+ filtered_df = filtered_df[combined_mask]
239
+
240
+ # Drop the search dummy column before returning
241
+ visible_columns = [col for col in filtered_df.columns if col != 'search_dummy']
242
+ return filtered_df[visible_columns]
243
+
244
+
245
+ def refresh_data_with_filters(version=CURRENT_VERSION, search_query="", model_types=None, selected_columns=None):
246
+ """
247
+ Refresh the leaderboard data and update all components with filtering.
248
+ Ensures we handle cases where dataframes might have limited columns.
249
+ """
250
+ try:
251
+ logger.info(f"Performing refresh of leaderboard data with filters...")
252
+ # Get new data
253
+ main_df = get_leaderboard_df(version=version)
254
+ category_dfs = [get_category_leaderboard_df(category, version=version) for category in CATEGORIES]
255
+ selected_columns = [x.lower().replace(" ", "_").replace("(", "").replace(")", "").replace("_recall", "_recall_binary") for x in selected_columns]
256
+
257
+ # Log the actual columns we have
258
+ logger.info(f"Main dataframe columns: {list(main_df.columns)}")
259
+
260
+ # Apply filters to each dataframe
261
+ filtered_main_df = search_filter_leaderboard(main_df, search_query, model_types, version)
262
+ filtered_category_dfs = [
263
+ search_filter_leaderboard(df, search_query, model_types, version)
264
+ for df in category_dfs
265
+ ]
266
+
267
+ # Get available columns from the dataframe
268
+ available_columns = list(filtered_main_df.columns)
269
+
270
+ # Filter selected columns to only those available in the data
271
+ if selected_columns:
272
+ valid_selected_columns = [col for col in selected_columns if col in available_columns]
273
+ if not valid_selected_columns and 'model_name' in available_columns:
274
+ valid_selected_columns = ['model_name'] + get_default_visible_columns()
275
+ else:
276
+ valid_selected_columns = available_columns
277
+
278
+ # Initialize dataframes for display with valid selected columns
279
+ main_dataframe = init_leaderboard(filtered_main_df, valid_selected_columns)
280
+
281
+ # For category dataframes, get columns that actually exist in each one
282
+ category_dataframes = []
283
+ for df in filtered_category_dfs:
284
+ df_columns = list(df.columns)
285
+ df_valid_columns = [col for col in valid_selected_columns if col in df_columns]
286
+ if not df_valid_columns and 'model_name' in df_columns:
287
+ df_valid_columns = ['model_name'] + get_default_visible_columns()
288
+ category_dataframes.append(init_leaderboard(df, df_valid_columns))
289
+
290
+ return main_dataframe, *category_dataframes
291
+
292
+ except Exception as e:
293
+ logger.error(f"Error in refresh with filters: {e}")
294
+ # Return the current leaderboards on error
295
+ return leaderboard, *[tab.children[0] for tab in category_tabs.children[1:len(CATEGORIES)+1]]
296
+
297
+
298
  def submit_results(
299
  model_name: str,
300
  base_model: str,
 
358
  main_df = get_leaderboard_df(version=version)
359
  category_dfs = [get_category_leaderboard_df(category, version=version) for category in CATEGORIES]
360
 
361
+ # For gr.Dataframe, we return the actual dataframes
362
+ return main_df, *category_dfs
 
 
 
363
 
364
  except Exception as e:
365
  logger.error(f"Error in scheduled refresh: {e}")
366
+ return None, *[None for _ in CATEGORIES]
 
367
 
368
 
369
  def update_leaderboards(version):
370
  """
371
  Update all leaderboard components with data for the selected version.
372
  """
373
+ try:
374
+ new_df = get_leaderboard_df(version=version)
375
+ category_dfs = [get_category_leaderboard_df(category, version=version) for category in CATEGORIES]
376
+ return new_df, *category_dfs
377
+ except Exception as e:
378
+ logger.error(f"Error updating leaderboards for version {version}: {e}")
379
+ return None, *[None for _ in CATEGORIES]
380
 
381
 
382
  def create_performance_plot(selected_models, category, metric="f1_binary", version=CURRENT_VERSION):
 
505
  scale=1
506
  )
507
 
508
+ with gr.Row():
509
+ search_input = gr.Textbox(
510
+ placeholder="Search models (separate queries with ;)...",
511
+ label="Search",
512
+ elem_id="search-bar"
513
+ )
514
+ model_type_filter = gr.Dropdown(
515
+ choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
516
+ label="Filter by Model Type",
517
+ multiselect=True,
518
+ value=[],
519
+ interactive=True
520
+ )
521
+ column_selector = gr.Dropdown(
522
+ choices=get_all_column_choices(),
523
+ label="Customize Columns",
524
+ multiselect=True,
525
+ value=get_initial_columns(),
526
+ interactive=True
527
+ )
528
+
529
  # Create tabs for each category
530
  with gr.Tabs(elem_classes="category-tabs") as category_tabs:
531
  # First tab for average metrics across all categories
532
  with gr.TabItem("📊 Overall Performance", elem_id="overall-tab"):
 
533
  leaderboard = init_leaderboard(LEADERBOARD_DF)
534
 
535
  # Create a tab for each category
536
  for category in CATEGORIES:
537
  with gr.TabItem(f"{category}", elem_id=f"category-{category.lower().replace(' ', '-')}-tab"):
 
538
  category_df = get_category_leaderboard_df(category, version=CURRENT_VERSION)
 
539
  category_leaderboard = init_leaderboard(category_df)
540
 
541
+ # Connect search and filter inputs to update function
542
+ def update_with_search_filters(version=CURRENT_VERSION, search_query="", model_types=None, selected_columns=None):
543
+ """
544
+ Update the leaderboards with search and filter settings.
545
+ """
546
+ return refresh_data_with_filters(version, search_query, model_types, selected_columns)
547
+
548
  # Refresh button functionality
549
  refresh_button.click(
550
+ fn=refresh_data_with_filters,
551
+ inputs=[version_selector, search_input, model_type_filter, column_selector],
552
+ outputs=[leaderboard] + [category_tabs.children[i].children[0] for i in range(1, len(CATEGORIES) + 1)]
553
+ )
554
+
555
+ # Search input functionality
556
+ search_input.change(
557
+ fn=refresh_data_with_filters,
558
+ inputs=[version_selector, search_input, model_type_filter, column_selector],
559
+ outputs=[leaderboard] + [category_tabs.children[i].children[0] for i in range(1, len(CATEGORIES) + 1)]
560
+ )
561
+
562
+ # Model type filter functionality
563
+ model_type_filter.change(
564
+ fn=refresh_data_with_filters,
565
+ inputs=[version_selector, search_input, model_type_filter, column_selector],
566
+ outputs=[leaderboard] + [category_tabs.children[i].children[0] for i in range(1, len(CATEGORIES) + 1)]
567
+ )
568
+
569
+ # Version selector functionality
570
+ version_selector.change(
571
+ fn=refresh_data_with_filters,
572
+ inputs=[version_selector, search_input, model_type_filter, column_selector],
573
+ outputs=[leaderboard] + [category_tabs.children[i].children[0] for i in range(1, len(CATEGORIES) + 1)]
574
+ )
575
+
576
+ # Update the update_columns function to handle updating all tabs at once
577
+ def update_columns(selected_columns):
578
+ """
579
+ Update all leaderboards to show the selected columns.
580
+ Ensures all selected columns are preserved in the update.
581
+
582
+ """
583
+
584
+ try:
585
+ logger.info(f"Updating columns to show: {selected_columns}")
586
+
587
+ # If no columns are selected, use default visible columns
588
+ if not selected_columns or len(selected_columns) == 0:
589
+ selected_columns = get_default_visible_columns()
590
+ logger.info(f"No columns selected, using defaults: {selected_columns}")
591
+
592
+ selected_columns = [x.lower().replace(" ", "_").replace("(", "").replace(")", "").replace("_recall", "_recall_binary") for x in selected_columns]
593
+
594
+
595
+ # Get the current data with ALL columns preserved
596
+ main_df = get_leaderboard_df(version=version_selector.value)
597
+
598
+ # Get category dataframes with ALL columns preserved
599
+ category_dfs = [get_category_leaderboard_df(category, version=version_selector.value)
600
+ for category in CATEGORIES]
601
+
602
+ # Log columns for debugging
603
+ logger.info(f"Main dataframe columns: {list(main_df.columns)}")
604
+ logger.info(f"Selected columns: {selected_columns}")
605
+
606
+ # IMPORTANT: Make sure model_name is always included
607
+ if 'model_name' in main_df.columns and 'model_name' not in selected_columns:
608
+ selected_columns = ['model_name'] + selected_columns
609
+
610
+ # Initialize the main leaderboard with the selected columns
611
+ # We're passing the raw selected_columns directly to preserve the selection
612
+ main_leaderboard = init_leaderboard(main_df, selected_columns)
613
+
614
+ # Initialize category dataframes with the same selected columns
615
+ # This ensures consistency across all tabs
616
+ category_leaderboards = []
617
+ for df in category_dfs:
618
+ # Use the same selected columns for each category
619
+ # init_leaderboard will automatically handle filtering to columns that exist
620
+ category_leaderboards.append(init_leaderboard(df, selected_columns))
621
+
622
+ return main_leaderboard, *category_leaderboards
623
+
624
+ except Exception as e:
625
+ logger.error(f"Error updating columns: {e}")
626
+ import traceback
627
+ logger.error(traceback.format_exc())
628
+ return leaderboard, *[tab.children[0] for tab in category_tabs.children[1:len(CATEGORIES)+1]]
629
+
630
+ # Connect column selector to update function
631
+ column_selector.change(
632
+ fn=update_columns,
633
+ inputs=[column_selector],
634
  outputs=[leaderboard] + [category_tabs.children[i].children[0] for i in range(1, len(CATEGORIES) + 1)]
635
  )
636
 
 
797
  if __name__ == "__main__":
798
 
799
  demo.launch(server_name="0.0.0.0", server_port=7860, share=True)
800
+
801
+
src/display/utils.py CHANGED
@@ -324,3 +324,32 @@ METRICS = [
324
  "error_ratio",
325
  "avg_runtime_ms"
326
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
  "error_ratio",
325
  "avg_runtime_ms"
326
  ]
327
+
328
+ def get_all_column_choices():
329
+ """
330
+ Get all available column choices for the multiselect dropdown.
331
+
332
+ Returns:
333
+ List of tuples with (column_name, display_name) for all columns.
334
+ """
335
+ column_choices = []
336
+
337
+ default_visible_columns = get_default_visible_columns()
338
+
339
+ for f in fields(GUARDBENCH_COLUMN):
340
+ column_info = getattr(GUARDBENCH_COLUMN, f.name)
341
+ # Create a tuple with both the internal name and display name
342
+ if column_info.name not in default_visible_columns:
343
+ column_choices.append((column_info.name, column_info.display_name))
344
+
345
+ return column_choices
346
+
347
+ def get_default_visible_columns():
348
+ """
349
+ Get the list of column names that should be visible by default.
350
+
351
+ Returns:
352
+ List of column names that are displayed by default.
353
+ """
354
+ return [getattr(GUARDBENCH_COLUMN, f.name).name for f in fields(GUARDBENCH_COLUMN)
355
+ if getattr(GUARDBENCH_COLUMN, f.name).displayed_by_default]