Mark Duppenthaler commited on
Commit
b087e88
·
1 Parent(s): 54be5f9

Combined leaderboard, simplified filters

Browse files
backend/app.py CHANGED
@@ -1,7 +1,7 @@
1
  from backend.chart import mk_variations
2
- from backend.config import get_dataset_config
3
  from backend.examples import audio_examples_tab, image_examples_tab, video_examples_tab
4
- from flask import Flask, Response, send_from_directory
5
  from flask_cors import CORS
6
  import os
7
  import logging
@@ -39,41 +39,47 @@ def index():
39
  @app.route("/data/<path:dataset_name>")
40
  def data_files(dataset_name):
41
  """
42
- Serves csv files from the data directory.
43
  """
44
- data_dir = os.path.join(os.path.dirname(__file__), "data")
45
- file_path = os.path.join(data_dir, dataset_name) + ".csv"
 
 
 
 
 
 
46
  logger.info(f"Looking for dataset file: {file_path}")
47
- if os.path.isfile(file_path):
48
  df = pd.read_csv(file_path)
49
  logger.info(f"Processing dataset: {dataset_name}")
50
- if dataset_name.endswith("benchmark"):
51
- return get_leaderboard(dataset_name, df)
52
- elif dataset_name.endswith("attacks_variations"):
53
- return get_chart(df)
54
-
55
- return "File not found", 404
 
 
56
 
57
 
58
  @app.route("/examples/<path:type>")
59
  def example_files(type):
60
  """
61
- Serve example files from the examples directory.
62
  """
63
 
64
- abs_path = "https://dl.fbaipublicfiles.com/omnisealbench/"
65
-
66
  # Switch based on the type parameter to call the appropriate tab function
67
  if type == "image":
68
- result = image_examples_tab(abs_path)
69
  return Response(json.dumps(result), mimetype="application/json")
70
  elif type == "audio":
71
  # Assuming you'll create these functions
72
- result = audio_examples_tab(abs_path)
73
  return Response(json.dumps(result), mimetype="application/json")
74
  elif type == "video":
75
  # Assuming you'll create these functions
76
- result = video_examples_tab(abs_path)
77
  return Response(json.dumps(result), mimetype="application/json")
78
  else:
79
  return "Invalid example type", 400
@@ -91,7 +97,7 @@ def proxy(url):
91
  url = unquote(url)
92
 
93
  # Make sure we're only proxying from trusted domains for security
94
- if not url.startswith("https://dl.fbaipublicfiles.com/"):
95
  return {"error": "Only proxying from allowed domains is permitted"}, 403
96
 
97
  response = requests.get(url, stream=True)
@@ -120,9 +126,9 @@ def proxy(url):
120
  return {"error": str(e)}, 500
121
 
122
 
123
- def get_leaderboard(dataset_name, df):
124
  # Determine file type and handle accordingly
125
- config = get_dataset_config(dataset_name)
126
 
127
  # This part adds on all the columns
128
  df = get_old_format_dataframe(df, config["first_cols"], config["attack_scores"])
@@ -146,11 +152,12 @@ def get_leaderboard(dataset_name, df):
146
  return Response(json.dumps(result), mimetype="application/json")
147
 
148
 
149
- def get_chart(df):
150
  # This function should return the chart data based on the DataFrame
151
  # For now, we will just return a placeholder response
152
  chart_data = mk_variations(
153
  df,
 
154
  # attacks_plot_metrics,
155
  # audio_attacks_with_variations,
156
  )
 
1
  from backend.chart import mk_variations
2
+ from backend.config import ABS_DATASET_DOMAIN, ABS_DATASET_PATH, get_dataset_config
3
  from backend.examples import audio_examples_tab, image_examples_tab, video_examples_tab
4
+ from flask import Flask, Response, send_from_directory, request
5
  from flask_cors import CORS
6
  import os
7
  import logging
 
39
  @app.route("/data/<path:dataset_name>")
40
  def data_files(dataset_name):
41
  """
42
+ Serves csv files from S3.
43
  """
44
+ # Get dataset_type from query params
45
+ dataset_type = request.args.get("dataset_type")
46
+ if not dataset_type:
47
+ logger.error("No dataset_type provided in query parameters.")
48
+ return "Dataset type not specified", 400
49
+
50
+ # data_dir = os.path.join(os.path.dirname(__file__), "data")
51
+ file_path = os.path.join(ABS_DATASET_PATH, dataset_name) + f"_{dataset_type}.csv"
52
  logger.info(f"Looking for dataset file: {file_path}")
53
+ try:
54
  df = pd.read_csv(file_path)
55
  logger.info(f"Processing dataset: {dataset_name}")
56
+ config = get_dataset_config(dataset_name)
57
+ if dataset_type == "benchmark":
58
+ return get_leaderboard(config, df)
59
+ elif dataset_type == "attacks_variations":
60
+ return get_chart(config, df)
61
+ except:
62
+ logger.error(f"Failed to fetch file: {file_path}")
63
+ return "File not found", 404
64
 
65
 
66
  @app.route("/examples/<path:type>")
67
  def example_files(type):
68
  """
69
+ Serve example files from S3.
70
  """
71
 
 
 
72
  # Switch based on the type parameter to call the appropriate tab function
73
  if type == "image":
74
+ result = image_examples_tab(ABS_DATASET_PATH)
75
  return Response(json.dumps(result), mimetype="application/json")
76
  elif type == "audio":
77
  # Assuming you'll create these functions
78
+ result = audio_examples_tab(ABS_DATASET_PATH)
79
  return Response(json.dumps(result), mimetype="application/json")
80
  elif type == "video":
81
  # Assuming you'll create these functions
82
+ result = video_examples_tab(ABS_DATASET_PATH)
83
  return Response(json.dumps(result), mimetype="application/json")
84
  else:
85
  return "Invalid example type", 400
 
97
  url = unquote(url)
98
 
99
  # Make sure we're only proxying from trusted domains for security
100
+ if not url.startswith(ABS_DATASET_DOMAIN):
101
  return {"error": "Only proxying from allowed domains is permitted"}, 403
102
 
103
  response = requests.get(url, stream=True)
 
126
  return {"error": str(e)}, 500
127
 
128
 
129
+ def get_leaderboard(config, df):
130
  # Determine file type and handle accordingly
131
+ logger.warning(f"Processing dataset with config: {config}")
132
 
133
  # This part adds on all the columns
134
  df = get_old_format_dataframe(df, config["first_cols"], config["attack_scores"])
 
152
  return Response(json.dumps(result), mimetype="application/json")
153
 
154
 
155
+ def get_chart(config, df):
156
  # This function should return the chart data based on the DataFrame
157
  # For now, we will just return a placeholder response
158
  chart_data = mk_variations(
159
  df,
160
+ config["attacks_with_variations"],
161
  # attacks_plot_metrics,
162
  # audio_attacks_with_variations,
163
  )
backend/chart.py CHANGED
@@ -2,44 +2,6 @@ import pandas as pd
2
 
3
  from pathlib import Path
4
 
5
- audio_attacks_with_variations = [
6
- "random_noise",
7
- "lowpass_filter",
8
- "highpass_filter",
9
- "boost_audio",
10
- "duck_audio",
11
- "shush",
12
- ]
13
-
14
- attacks_plot_metrics = ["bit_acc", "log10_p_value", "TPR", "FPR", "watermark_det_score"]
15
-
16
- image_attacks_with_variations = [
17
- "center_crop",
18
- "jpeg",
19
- "brightness",
20
- "contrast",
21
- "saturation",
22
- "sharpness",
23
- "resize",
24
- "perspective",
25
- "median_filter",
26
- "hue",
27
- "gaussian_blur",
28
- ]
29
-
30
-
31
- video_attacks_with_variations = [
32
- "Rotate",
33
- "Resize",
34
- "Crop",
35
- "Brightness",
36
- "Contrast",
37
- "Saturation",
38
- "H264",
39
- "H264rgb",
40
- "H265",
41
- ]
42
-
43
 
44
  def plot_data(metric, selected_attack, all_attacks_df):
45
  attack_df = all_attacks_df[all_attacks_df.attack == selected_attack]
@@ -57,8 +19,7 @@ def plot_data(metric, selected_attack, all_attacks_df):
57
 
58
  def mk_variations(
59
  all_attacks_df,
60
- metrics: list[str] = attacks_plot_metrics,
61
- attacks_with_variations: list[str] = audio_attacks_with_variations,
62
  ):
63
  # all_attacks_df = pd.read_csv(csv_file)
64
  # print(all_attacks_df)
@@ -92,8 +53,17 @@ def mk_variations(
92
  # all_graphs,
93
  # )
94
 
 
 
 
 
 
 
 
 
 
95
  return {
96
- "metrics": metrics,
97
  "attacks_with_variations": attacks_with_variations,
98
  "all_attacks_df": all_attacks_df.to_dict(orient="records"),
99
  }
 
2
 
3
  from pathlib import Path
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  def plot_data(metric, selected_attack, all_attacks_df):
7
  attack_df = all_attacks_df[all_attacks_df.attack == selected_attack]
 
19
 
20
  def mk_variations(
21
  all_attacks_df,
22
+ attacks_with_variations: list[str],
 
23
  ):
24
  # all_attacks_df = pd.read_csv(csv_file)
25
  # print(all_attacks_df)
 
53
  # all_graphs,
54
  # )
55
 
56
+ # Replace NaN values with None for JSON serialization
57
+ all_attacks_df = all_attacks_df.fillna(value="NaN")
58
+ attacks_plot_metrics = [
59
+ "bit_acc",
60
+ "log10_p_value",
61
+ "TPR",
62
+ "FPR",
63
+ "watermark_det_score",
64
+ ]
65
  return {
66
+ "metrics": attacks_plot_metrics,
67
  "attacks_with_variations": attacks_with_variations,
68
  "all_attacks_df": all_attacks_df.to_dict(orient="records"),
69
  }
backend/config.py CHANGED
@@ -1,5 +1,9 @@
 
 
 
 
1
  def get_dataset_config(dataset_name):
2
- if dataset_name == "voxpopuli_1k_audio_benchmark":
3
  return {
4
  "first_cols": [
5
  "snr",
@@ -29,8 +33,16 @@ def get_dataset_config(dataset_name):
29
  "aac_compression": "Compression",
30
  "mp3_compression": "Compression",
31
  },
 
 
 
 
 
 
 
 
32
  }
33
- elif dataset_name == "ravdess_1k_audio_benchmark":
34
  return {
35
  "first_cols": ["snr", "sisnr", "stoi", "pesq"],
36
  "attack_scores": ["bit_acc", "log10_p_value", "TPR", "FPR"],
@@ -50,8 +62,16 @@ def get_dataset_config(dataset_name):
50
  "aac_compression": "Compression",
51
  "mp3_compression": "Compression",
52
  },
 
 
 
 
 
 
 
 
53
  }
54
- elif dataset_name == "val2014_1k_image_benchmark":
55
  return {
56
  "first_cols": ["psnr", "ssim", "lpips", "decoder_time"],
57
  "attack_scores": ["bit_acc", "log10_p_value", "TPR", "FPR"],
@@ -76,8 +96,21 @@ def get_dataset_config(dataset_name):
76
  "avg": "Averages",
77
  "none": "Baseline",
78
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  }
80
- elif dataset_name == "sav_val_full_video_benchmark":
81
  return {
82
  "first_cols": ["psnr", "ssim", "msssim", "lpips", "vmaf", "decoder_time"],
83
  "attack_scores": ["bit_acc", "log10_p_value", "TPR", "FPR"],
@@ -104,6 +137,17 @@ def get_dataset_config(dataset_name):
104
  "H264_Crop_Brightness2": "Mixed",
105
  "H264_Crop_Brightness3": "Mixed",
106
  },
 
 
 
 
 
 
 
 
 
 
 
107
  }
108
  else:
109
  raise ValueError(f"Unknown dataset: {dataset_name}")
 
1
+ ABS_DATASET_DOMAIN = "https://dl.fbaipublicfiles.com"
2
+ ABS_DATASET_PATH = f"{ABS_DATASET_DOMAIN}/omnisealbench/"
3
+
4
+
5
  def get_dataset_config(dataset_name):
6
+ if dataset_name == "voxpopuli_1k/audio":
7
  return {
8
  "first_cols": [
9
  "snr",
 
33
  "aac_compression": "Compression",
34
  "mp3_compression": "Compression",
35
  },
36
+ "attacks_with_variations": [
37
+ "random_noise",
38
+ "lowpass_filter",
39
+ "highpass_filter",
40
+ "boost_audio",
41
+ "duck_audio",
42
+ "shush",
43
+ ],
44
  }
45
+ elif dataset_name == "ravdess_1k/audio":
46
  return {
47
  "first_cols": ["snr", "sisnr", "stoi", "pesq"],
48
  "attack_scores": ["bit_acc", "log10_p_value", "TPR", "FPR"],
 
62
  "aac_compression": "Compression",
63
  "mp3_compression": "Compression",
64
  },
65
+ "attacks_with_variations": [
66
+ "random_noise",
67
+ "lowpass_filter",
68
+ "highpass_filter",
69
+ "boost_audio",
70
+ "duck_audio",
71
+ "shush",
72
+ ],
73
  }
74
+ elif dataset_name == "val2014_1k/image":
75
  return {
76
  "first_cols": ["psnr", "ssim", "lpips", "decoder_time"],
77
  "attack_scores": ["bit_acc", "log10_p_value", "TPR", "FPR"],
 
96
  "avg": "Averages",
97
  "none": "Baseline",
98
  },
99
+ "attacks_with_variations": [
100
+ "center_crop",
101
+ "jpeg",
102
+ "brightness",
103
+ "contrast",
104
+ "saturation",
105
+ "sharpness",
106
+ "resize",
107
+ "perspective",
108
+ "median_filter",
109
+ "hue",
110
+ "gaussian_blur",
111
+ ],
112
  }
113
+ elif dataset_name == "sav_val_full/video":
114
  return {
115
  "first_cols": ["psnr", "ssim", "msssim", "lpips", "vmaf", "decoder_time"],
116
  "attack_scores": ["bit_acc", "log10_p_value", "TPR", "FPR"],
 
137
  "H264_Crop_Brightness2": "Mixed",
138
  "H264_Crop_Brightness3": "Mixed",
139
  },
140
+ "attacks_with_variations": [
141
+ "Rotate",
142
+ "Resize",
143
+ "Crop",
144
+ "Brightness",
145
+ "Contrast",
146
+ "Saturation",
147
+ "H264",
148
+ "H264rgb",
149
+ "H265",
150
+ ],
151
  }
152
  else:
153
  raise ValueError(f"Unknown dataset: {dataset_name}")
frontend/src/App.tsx CHANGED
@@ -1,17 +1,32 @@
1
- import { useState } from 'react'
2
  import Examples from './components/Examples'
3
- import LeaderBoardPage from './components/LeaderBoardPage'
4
 
5
  function App() {
6
  const [activeTab, setActiveTab] = useState<
7
  'leaderboard' | 'imageExamples' | 'audioExamples' | 'videoExamples'
8
  >('leaderboard')
 
 
 
 
 
9
 
10
  return (
11
  <div className="min-h-screen w-11/12 mx-auto">
12
- <div className="card max-w-4xl bg-base-100">
13
- <div className="card-body">
14
  <h2 className="card-title">🥇 Omni Seal Bench Watermarking Leaderboard</h2>
 
 
 
 
 
 
 
 
 
 
15
  </div>
16
  </div>
17
 
@@ -24,8 +39,8 @@ function App() {
24
  checked={activeTab === 'leaderboard'}
25
  onChange={() => setActiveTab('leaderboard')}
26
  />
27
- <div className="tab-content bg-base-100 border-base-300 p-6">
28
- <LeaderBoardPage />
29
  </div>
30
 
31
  <input
@@ -36,8 +51,8 @@ function App() {
36
  checked={activeTab === 'imageExamples'}
37
  onChange={() => setActiveTab('imageExamples')}
38
  />
39
- <div className="tab-content bg-base-100 border-base-300 p-6">
40
- <Examples fileType="image" />
41
  </div>
42
 
43
  <input
@@ -48,8 +63,8 @@ function App() {
48
  checked={activeTab === 'audioExamples'}
49
  onChange={() => setActiveTab('audioExamples')}
50
  />
51
- <div className="tab-content bg-base-100 border-base-300 p-6">
52
- <Examples fileType="audio" />
53
  </div>
54
 
55
  <input
@@ -60,8 +75,8 @@ function App() {
60
  checked={activeTab === 'videoExamples'}
61
  onChange={() => setActiveTab('videoExamples')}
62
  />
63
- <div className="tab-content bg-base-100 border-base-300 p-6">
64
- <Examples fileType="video" />
65
  </div>
66
  </div>
67
  </div>
 
1
+ import { useState, useEffect } from 'react'
2
  import Examples from './components/Examples'
3
+ import LeaderboardPage from './components/LeaderboardPage'
4
 
5
  function App() {
6
  const [activeTab, setActiveTab] = useState<
7
  'leaderboard' | 'imageExamples' | 'audioExamples' | 'videoExamples'
8
  >('leaderboard')
9
+ const [theme, setTheme] = useState<'dark' | 'light'>('dark')
10
+
11
+ useEffect(() => {
12
+ document.documentElement.setAttribute('data-theme', theme)
13
+ }, [theme])
14
 
15
  return (
16
  <div className="min-h-screen w-11/12 mx-auto">
17
+ <div className="bg-base-100 my-4">
18
+ <div className="flex flex-row justify-between items-center">
19
  <h2 className="card-title">🥇 Omni Seal Bench Watermarking Leaderboard</h2>
20
+ <div className="flex justify-end items-center gap-2">
21
+ <span className="text-sm">{theme === 'dark' ? '🌙 Dark Mode' : '☀️ Light Mode'}</span>
22
+ <input
23
+ type="checkbox"
24
+ className="toggle"
25
+ checked={theme === 'dark'}
26
+ onChange={() => setTheme(theme === 'dark' ? 'light' : 'dark')}
27
+ aria-label="Toggle dark mode"
28
+ />
29
+ </div>
30
  </div>
31
  </div>
32
 
 
39
  checked={activeTab === 'leaderboard'}
40
  onChange={() => setActiveTab('leaderboard')}
41
  />
42
+ <div className="tab-content bg-base-100 ">
43
+ <LeaderboardPage />
44
  </div>
45
 
46
  <input
 
51
  checked={activeTab === 'imageExamples'}
52
  onChange={() => setActiveTab('imageExamples')}
53
  />
54
+ <div className="tab-content bg-base-100 ">
55
+ {activeTab === 'imageExamples' ? <Examples fileType="image" /> : null}
56
  </div>
57
 
58
  <input
 
63
  checked={activeTab === 'audioExamples'}
64
  onChange={() => setActiveTab('audioExamples')}
65
  />
66
+ <div className="tab-content bg-base-100 ">
67
+ {activeTab === 'audioExamples' ? <Examples fileType="audio" /> : null}
68
  </div>
69
 
70
  <input
 
75
  checked={activeTab === 'videoExamples'}
76
  onChange={() => setActiveTab('videoExamples')}
77
  />
78
+ <div className="tab-content bg-base-100 ">
79
+ {activeTab === 'videoExamples' ? <Examples fileType="video" /> : null}
80
  </div>
81
  </div>
82
  </div>
frontend/src/components/DatasetSelector.tsx CHANGED
@@ -1,31 +1,31 @@
1
  import React from 'react'
2
 
3
  interface DatasetSelectorProps {
4
- datasets: string[]
5
- selectedDataset: string
6
- onDatasetChange: (dataset: string) => void
7
  }
8
 
9
  const DatasetSelector: React.FC<DatasetSelectorProps> = ({
10
- datasets,
11
- selectedDataset,
12
- onDatasetChange,
13
  }) => {
14
  return (
15
  <div className="mb-4">
16
- <fieldset className="fieldset w-full p-4 rounded border">
17
  <legend className="fieldset-legend font-semibold">Dataset</legend>
18
  <div className="flex flex-wrap gap-2">
19
- {datasets.map((dataset) => (
20
- <label key={dataset} className="flex items-center gap-2 cursor-pointer">
21
  <input
22
  type="radio"
23
  name="dataset"
24
  className="radio radio-sm"
25
- checked={selectedDataset === dataset}
26
- onChange={() => onDatasetChange(dataset)}
27
  />
28
- <span className="text-sm">{dataset}</span>
29
  </label>
30
  ))}
31
  </div>
 
1
  import React from 'react'
2
 
3
  interface DatasetSelectorProps {
4
+ datasetNames: string[]
5
+ selectedDatasetName: string
6
+ onDatasetNameChange: (datasetName: string) => void
7
  }
8
 
9
  const DatasetSelector: React.FC<DatasetSelectorProps> = ({
10
+ datasetNames,
11
+ selectedDatasetName,
12
+ onDatasetNameChange,
13
  }) => {
14
  return (
15
  <div className="mb-4">
16
+ <fieldset className="fieldset w-full p-4 rounded border border-gray-700">
17
  <legend className="fieldset-legend font-semibold">Dataset</legend>
18
  <div className="flex flex-wrap gap-2">
19
+ {datasetNames.map((datasetName) => (
20
+ <label key={datasetName} className="flex items-center gap-2 cursor-pointer">
21
  <input
22
  type="radio"
23
  name="dataset"
24
  className="radio radio-sm"
25
+ checked={selectedDatasetName === datasetName}
26
+ onChange={() => onDatasetNameChange(datasetName)}
27
  />
28
+ <span className="text-sm">{datasetName}</span>
29
  </label>
30
  ))}
31
  </div>
frontend/src/components/Examples.tsx CHANGED
@@ -1,6 +1,7 @@
1
  import React, { useState, useEffect } from 'react'
2
  import API from '../API'
3
  import AudioPlayer from './AudioPlayer'
 
4
 
5
  interface ExamplesProps {
6
  fileType: 'image' | 'audio' | 'video'
@@ -131,7 +132,7 @@ const Examples = ({ fileType }: ExamplesProps) => {
131
  )}
132
  </div>
133
 
134
- {loading && <p>Loading files...</p>}
135
  {error && <p className="error">Error: {error}</p>}
136
 
137
  {selectedModel && selectedAttack && (
 
1
  import React, { useState, useEffect } from 'react'
2
  import API from '../API'
3
  import AudioPlayer from './AudioPlayer'
4
+ import LoadingSpinner from './LoadingSpinner'
5
 
6
  interface ExamplesProps {
7
  fileType: 'image' | 'audio' | 'video'
 
132
  )}
133
  </div>
134
 
135
+ {loading && <LoadingSpinner />}
136
  {error && <p className="error">Error: {error}</p>}
137
 
138
  {selectedModel && selectedAttack && (
frontend/src/components/LeaderBoardPage.tsx DELETED
@@ -1,34 +0,0 @@
1
- import React, { useState } from 'react'
2
- import DatasetSelector from './DatasetSelector'
3
- import LeaderboardTable from './LeaderboardTable'
4
- import DataChart from './DataChart'
5
-
6
- const LeaderBoardPage: React.FC = () => {
7
- const datasets = [
8
- 'voxpopuli_1k_audio',
9
- 'ravdess_1k_audio',
10
- 'val2014_1k_image',
11
- 'sav_val_full_video',
12
- ]
13
- const [selectedDataset, setSelectedDataset] = useState('voxpopuli_1k_audio')
14
-
15
- return (
16
- <div className="space-y-6">
17
- <DatasetSelector
18
- datasets={datasets}
19
- selectedDataset={selectedDataset}
20
- onDatasetChange={setSelectedDataset}
21
- />
22
-
23
- <div className="space-y-8">
24
- <LeaderboardTable dataset={selectedDataset} />
25
- <div className="mt-8 pt-4 border-t border-gray-200">
26
- <h3 className="text-lg font-semibold mb-4">Performance Chart</h3>
27
- <DataChart dataset={selectedDataset} />
28
- </div>
29
- </div>
30
- </div>
31
- )
32
- }
33
-
34
- export default LeaderBoardPage
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
frontend/src/components/LeaderboardChart.tsx ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useEffect, useState } from 'react'
2
+ import {
3
+ LineChart,
4
+ Line,
5
+ XAxis,
6
+ YAxis,
7
+ CartesianGrid,
8
+ Tooltip,
9
+ Legend,
10
+ ResponsiveContainer,
11
+ } from 'recharts'
12
+ import API from '../API'
13
+ import LoadingSpinner from './LoadingSpinner'
14
+
15
+ interface LeaderboardChartProps {
16
+ dataset: string
17
+ selectedModels: Set<string>
18
+ }
19
+
20
+ interface Row {
21
+ metric: string
22
+ [key: string]: string | number
23
+ }
24
+
25
+ const MetricSelector = ({
26
+ metrics,
27
+ selectedMetric,
28
+ onMetricChange,
29
+ }: {
30
+ metrics: Set<string>
31
+ selectedMetric: string | null
32
+ onMetricChange: (event: React.ChangeEvent<HTMLSelectElement>) => void
33
+ }) => {
34
+ return (
35
+ <fieldset className="fieldset">
36
+ <legend className="fieldset-legend">Metric</legend>
37
+ <select
38
+ id="metric-selector"
39
+ value={selectedMetric || ''}
40
+ onChange={onMetricChange}
41
+ className="select select-bordered w-full"
42
+ >
43
+ {[...metrics].map((metric) => (
44
+ <option key={metric} value={metric}>
45
+ {metric}
46
+ </option>
47
+ ))}
48
+ </select>
49
+ </fieldset>
50
+ )
51
+ }
52
+
53
+ const AttackSelector = ({
54
+ attacks,
55
+ selectedAttack,
56
+ onAttackChange,
57
+ }: {
58
+ attacks: Set<string>
59
+ selectedAttack: string | null
60
+ onAttackChange: (event: React.ChangeEvent<HTMLSelectElement>) => void
61
+ }) => {
62
+ return (
63
+ <fieldset className="fieldset mb-4">
64
+ <legend className="fieldset-legend">Attack</legend>
65
+ <select
66
+ id="attack-selector"
67
+ value={selectedAttack || ''}
68
+ onChange={onAttackChange}
69
+ className="select select-bordered w-full"
70
+ >
71
+ {[...attacks].map((attack) => (
72
+ <option key={attack} value={attack}>
73
+ {attack}
74
+ </option>
75
+ ))}
76
+ </select>
77
+ </fieldset>
78
+ )
79
+ }
80
+
81
+ const LeaderboardChart = ({ dataset, selectedModels }: LeaderboardChartProps) => {
82
+ const [chartData, setChartData] = useState<Row[]>([])
83
+ const [loading, setLoading] = useState(true)
84
+ const [error, setError] = useState<string | null>(null)
85
+ const [metrics, setMetrics] = useState<Set<string>>(new Set())
86
+ const [attacks, setAttacks] = useState<Set<string>>(new Set())
87
+ const [selectedMetric, setSelectedMetric] = useState<string | null>(null)
88
+ const [selectedAttack, setSelectedAttack] = useState<string | null>(null)
89
+
90
+ useEffect(() => {
91
+ setLoading(true)
92
+ API.fetchStaticFile(`data/${dataset}?dataset_type=attacks_variations`)
93
+ .then((response) => {
94
+ const data = JSON.parse(response)
95
+ const rows: Row[] = data['all_attacks_df'].map((row: any) => {
96
+ const newRow: Row = { ...row }
97
+ // Convert strength value to number if it exists and is a string
98
+ if (typeof newRow.strength === 'string') {
99
+ newRow.strength = parseFloat(newRow.strength)
100
+ }
101
+ return newRow
102
+ })
103
+
104
+ setSelectedMetric(data['metrics'][0])
105
+ setMetrics(new Set(data['metrics']))
106
+ setSelectedAttack(data['attacks_with_variations'][0])
107
+ setAttacks(new Set(data['attacks_with_variations']))
108
+ setChartData(rows)
109
+ setLoading(false)
110
+ })
111
+ .catch((err) => {
112
+ setError('Failed to fetch JSON: ' + err.message)
113
+ setLoading(false)
114
+ })
115
+ }, [dataset])
116
+
117
+ const handleMetricChange = (event: React.ChangeEvent<HTMLSelectElement>) => {
118
+ setSelectedMetric(event.target.value)
119
+ }
120
+
121
+ const handleAttackChange = (event: React.ChangeEvent<HTMLSelectElement>) => {
122
+ setSelectedAttack(event.target.value)
123
+ }
124
+
125
+ // Sort the chart data by the 'strength' field before rendering
126
+ const sortedChartData = chartData
127
+ .filter((row) => !selectedAttack || row.attack === selectedAttack)
128
+ .sort((a, b) => (a.strength as number) - (b.strength as number))
129
+
130
+ return (
131
+ <div className="rounded shadow p-4 overflow-auto mb-8">
132
+ {loading && <LoadingSpinner />}
133
+ {error && <div className="text-red-500">{error}</div>}
134
+ {!loading && !error && (
135
+ <>
136
+ <div className="flex flex-col md:flex-row md:gap-x-4 mb-4">
137
+ <div className="w-full md:w-1/2">
138
+ <MetricSelector
139
+ metrics={metrics}
140
+ selectedMetric={selectedMetric}
141
+ onMetricChange={handleMetricChange}
142
+ />
143
+ </div>
144
+ <div className="w-full md:w-1/2">
145
+ <AttackSelector
146
+ attacks={attacks}
147
+ selectedAttack={selectedAttack}
148
+ onAttackChange={handleAttackChange}
149
+ />
150
+ </div>
151
+ </div>
152
+
153
+ {chartData.length > 0 && (
154
+ <div className="h-64 mb-4">
155
+ <ResponsiveContainer width="100%" height="100%">
156
+ <LineChart
157
+ data={sortedChartData}
158
+ margin={{
159
+ top: 5,
160
+ right: 30,
161
+ left: 20,
162
+ bottom: 5,
163
+ }}
164
+ >
165
+ <CartesianGrid strokeDasharray="3 3" />
166
+ <XAxis
167
+ dataKey="strength"
168
+ domain={[
169
+ Math.min(...sortedChartData.map((item) => Number(item.strength))),
170
+ Math.max(...sortedChartData.map((item) => Number(item.strength))),
171
+ ]}
172
+ type="number"
173
+ tickFormatter={(value) => value.toFixed(3)}
174
+ label={{ value: 'Strength', position: 'insideBottomRight', offset: -5 }}
175
+ />
176
+ <YAxis
177
+ label={{
178
+ value: selectedMetric || '',
179
+ angle: -90,
180
+ position: 'insideLeft',
181
+ style: { textAnchor: 'middle' },
182
+ }}
183
+ tickFormatter={(value) => value.toFixed(3)}
184
+ />
185
+ <Tooltip
186
+ contentStyle={{
187
+ backgroundColor: '#2a303c',
188
+ borderColor: '#374151',
189
+ color: 'white',
190
+ }}
191
+ formatter={(value: number) => value.toFixed(3)}
192
+ />
193
+ <Legend />
194
+
195
+ {(() => {
196
+ // Ensure selectedMetric is not null before rendering the Line components
197
+ if (!selectedMetric) return null // Do not render lines if no metric is selected
198
+
199
+ // Get unique models from the filtered and sorted data
200
+ const models = new Set(
201
+ sortedChartData
202
+ .filter((row) => selectedModels.has(row.model as string))
203
+ .map((row) => row.model)
204
+ )
205
+
206
+ // Generate different colors for each model
207
+ const colors = [
208
+ '#8884d8',
209
+ '#82ca9d',
210
+ '#ffc658',
211
+ '#ff8042',
212
+ '#0088fe',
213
+ '#00C49F',
214
+ ]
215
+
216
+ // Return a Line component for each model
217
+ return [...models].map((model, index) => {
218
+ return (
219
+ <Line
220
+ key={model as string}
221
+ type="monotone"
222
+ dataKey={selectedMetric as string} // Ensure selectedMetric is a string
223
+ data={sortedChartData.filter((row) => row.model === model)}
224
+ name={model as string}
225
+ stroke={colors[index % colors.length]}
226
+ dot={false}
227
+ />
228
+ )
229
+ })
230
+ })()}
231
+ </LineChart>
232
+ </ResponsiveContainer>
233
+ </div>
234
+ )}
235
+ </>
236
+ )}
237
+ </div>
238
+ )
239
+ }
240
+
241
+ export default LeaderboardChart
frontend/src/components/LeaderboardFilter.tsx CHANGED
@@ -173,7 +173,7 @@ const LeaderboardFilter: React.FC<FilterProps> = ({
173
  <input
174
  type="text"
175
  placeholder="Search metrics..."
176
- className="input input-bordered border-white input-sm w-48 pr-8"
177
  value={searchTerm}
178
  onChange={(e) => {
179
  const value = e.target.value
 
173
  <input
174
  type="text"
175
  placeholder="Search metrics..."
176
+ className="input input-bordered border-gray-300 input-sm w-48 pr-8"
177
  value={searchTerm}
178
  onChange={(e) => {
179
  const value = e.target.value
frontend/src/components/LeaderboardPage.tsx ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState, useEffect } from 'react'
2
+ import DatasetSelector from './DatasetSelector'
3
+ import LeaderboardTable from './LeaderboardTable'
4
+ import LeaderboardChart from './LeaderboardChart'
5
+ import ModelFilter from './ModelFilter'
6
+ import API from '../API'
7
+ import LoadingSpinner from './LoadingSpinner'
8
+
9
+ const LeaderboardPage: React.FC = () => {
10
+ const datasetNames = [
11
+ 'voxpopuli_1k/audio',
12
+ 'ravdess_1k/audio',
13
+ 'val2014_1k/image',
14
+ 'sav_val_full/video',
15
+ ]
16
+ const [selectedDatasetName, setSelectedDatasetName] = useState(datasetNames[0])
17
+ const [models, setModels] = useState<string[]>([])
18
+ const [selectedModels, setSelectedModels] = useState<Set<string>>(new Set())
19
+ const [loading, setLoading] = useState(true)
20
+ const [benchmarkData, setBenchmarkData] = useState<any>(null)
21
+
22
+ // Fetch available models when dataset changes
23
+ useEffect(() => {
24
+ setLoading(true)
25
+ API.fetchStaticFile(`data/${selectedDatasetName}?dataset_type=benchmark`)
26
+ .then((response) => {
27
+ const data = JSON.parse(response)
28
+ setBenchmarkData(data)
29
+ const rows = data['rows']
30
+ const allKeys: string[] = Array.from(new Set(rows.flatMap((row: any) => Object.keys(row))))
31
+ // Remove 'metric' from headers if it exists
32
+ const headers = allKeys.filter((key) => key !== 'metric')
33
+
34
+ setModels(headers)
35
+ // Initialize all models as selected
36
+ setSelectedModels(new Set(headers))
37
+ setLoading(false)
38
+ })
39
+ .catch((err) => {
40
+ console.error('Failed to fetch models:', err)
41
+ setLoading(false)
42
+ })
43
+ }, [selectedDatasetName])
44
+
45
+ return (
46
+ <div className="">
47
+ <div className="flex flex-col gap-4">
48
+ <DatasetSelector
49
+ datasetNames={datasetNames}
50
+ selectedDatasetName={selectedDatasetName}
51
+ onDatasetNameChange={setSelectedDatasetName}
52
+ />
53
+ </div>
54
+ {loading ? (
55
+ <LoadingSpinner />
56
+ ) : (
57
+ <>
58
+ {models.length > 0 && (
59
+ <ModelFilter
60
+ models={models}
61
+ selectedModels={selectedModels}
62
+ setSelectedModels={setSelectedModels}
63
+ />
64
+ )}
65
+ <div className="space-y-8">
66
+ <LeaderboardTable benchmarkData={benchmarkData} selectedModels={selectedModels} />
67
+ <div className="mt-8 pt-4 border-t border-gray-200">
68
+ <LeaderboardChart dataset={selectedDatasetName} selectedModels={selectedModels} />
69
+ </div>
70
+ </div>
71
+ </>
72
+ )}
73
+ </div>
74
+ )
75
+ }
76
+
77
+ export default LeaderboardPage
frontend/src/components/LeaderboardTable.tsx CHANGED
@@ -1,10 +1,10 @@
1
  import React, { useEffect, useState } from 'react'
2
- import API from '../API'
3
  import LeaderboardFilter from './LeaderboardFilter'
4
- import ModelFilter from './ModelFilter'
5
 
6
  interface LeaderboardTableProps {
7
- dataset: string
 
8
  }
9
 
10
  interface Row {
@@ -16,119 +16,126 @@ interface Groups {
16
  [group: string]: { [subgroup: string]: string[] }
17
  }
18
 
19
- interface GroupStats {
20
- average: { [key: string]: number }
21
- stdDev: { [key: string]: number }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  }
23
 
24
- const LeaderboardTable: React.FC<LeaderboardTableProps> = ({ dataset }) => {
25
  const [tableRows, setTableRows] = useState<Row[]>([])
26
  const [tableHeader, setTableHeader] = useState<string[]>([])
27
- const [loading, setLoading] = useState(true)
28
  const [error, setError] = useState<string | null>(null)
29
  const [groups, setGroups] = useState<Groups>({})
30
  const [openGroups, setOpenGroups] = useState<{ [key: string]: boolean }>({})
31
  const [openSubGroups, setOpenSubGroups] = useState<{ [key: string]: { [key: string]: boolean } }>(
32
  {}
33
  )
34
-
35
  const [selectedMetrics, setSelectedMetrics] = useState<Set<string>>(new Set())
36
- const [selectedModels, setSelectedModels] = useState<Set<string>>(new Set())
37
-
38
- // To store the unique metrics from the Overall group
39
  const [overallMetrics, setOverallMetrics] = useState<string[]>([])
 
40
 
41
  useEffect(() => {
42
- API.fetchStaticFile(`data/${dataset}_benchmark`)
43
- .then((response) => {
44
- const data = JSON.parse(response)
45
- const rows: Row[] = data['rows']
46
- // Split out the Overall group from groups
47
- const allGroups = data['groups'] as { [key: string]: string[] }
48
- // const overallGroup = allGroups['Overall'] || []
49
- // Remove 'Overall' from groups
50
- const { Overall: overallGroup, ...groups } = allGroups
51
- const uniqueMetrics = new Set<string>()
52
-
53
- overallGroup.forEach((metric) => {
54
- if (metric.includes('_')) {
55
- // Extract the part after the first underscore
56
- const metricName = metric.split('_').slice(1).join('_')
57
- uniqueMetrics.add(metricName)
58
- }
 
 
 
 
 
59
  })
60
-
61
- setOverallMetrics(Array.from(uniqueMetrics).sort())
62
-
63
- // Each value of groups is a list of metrics, group them by the first part of the metric before the first _
64
- const groupsData = Object.entries(groups)
65
- .sort(([groupA], [groupB]) => {
66
- // Make sure "overall" comes first
67
- if (groupA === 'Overall') return -1
68
- if (groupB === 'Overall') return 1
69
- // Otherwise sort alphabetically
70
- return groupA.localeCompare(groupB)
71
- })
72
- .reduce(
73
- (acc, [group, metrics]) => {
74
- // Sort metrics to ensure consistent subgroup order
75
- const sortedMetrics = [...metrics].sort()
76
-
77
- // Create and sort subgroups
78
- acc[group] = sortedMetrics.reduce<{ [key: string]: string[] }>((subAcc, metric) => {
79
- const [mainGroup, subGroup] = metric.split('_')
80
- if (!subAcc[mainGroup]) {
81
- subAcc[mainGroup] = []
82
- }
83
- subAcc[mainGroup].push(metric)
84
- return subAcc
85
- }, {})
86
-
87
- // Convert to sorted entries and back to object
88
- acc[group] = Object.fromEntries(
89
- Object.entries(acc[group]).sort(([subGroupA], [subGroupB]) =>
90
- subGroupA.localeCompare(subGroupB)
91
- )
92
  )
93
-
94
- return acc
95
- },
96
- {} as { [key: string]: { [key: string]: string[] } }
97
- )
98
-
99
- const allKeys: string[] = Array.from(new Set(rows.flatMap((row) => Object.keys(row))))
100
- // Remove 'metric' from headers if it exists
101
- const headers = allKeys.filter((key) => key !== 'metric')
102
-
103
- // Initialize open states for groups and subgroups
104
- const initialOpenGroups: { [key: string]: boolean } = {}
105
- const initialOpenSubGroups: { [key: string]: { [key: string]: boolean } } = {}
106
-
107
- Object.keys(groupsData).forEach((group) => {
108
- initialOpenGroups[group] = false
109
- initialOpenSubGroups[group] = {}
110
- Object.keys(groupsData[group]).forEach((subGroup) => {
111
- initialOpenSubGroups[group][subGroup] = false
112
- })
113
  })
114
-
115
- // Get all metrics from all groups
116
- const allMetrics = Object.values(groups).flat()
117
- setSelectedMetrics(new Set(allMetrics))
118
- // Initialize all models as selected
119
- setSelectedModels(new Set(headers))
120
- setTableHeader(headers)
121
- setTableRows(rows)
122
- setGroups(groupsData)
123
- setOpenGroups(initialOpenGroups)
124
- setOpenSubGroups(initialOpenSubGroups)
125
- setLoading(false)
126
- })
127
- .catch((err) => {
128
- setError('Failed to fetch JSON: ' + err.message)
129
- setLoading(false)
130
  })
131
- }, [dataset])
 
 
 
 
 
 
 
 
 
 
 
132
 
133
  const toggleGroup = (group: string) => {
134
  setOpenGroups((prev) => ({ ...prev, [group]: !prev[group] }))
@@ -227,23 +234,21 @@ const LeaderboardTable: React.FC<LeaderboardTableProps> = ({ dataset }) => {
227
  }
228
 
229
  return (
230
- <div className="rounded shadow overflow-auto">
231
- {loading && <div>Loading...</div>}
232
  {error && <div className="text-red-500">{error}</div>}
233
-
234
- {!loading && !error && (
235
- <div className="overflow-x-auto">
236
  <div className="flex flex-col gap-4">
237
- <ModelFilter
238
- models={tableHeader}
239
- selectedModels={selectedModels}
240
- setSelectedModels={setSelectedModels}
241
  />
242
- <LeaderboardFilter
243
  groups={groups}
244
  selectedMetrics={selectedMetrics}
245
  setSelectedMetrics={setSelectedMetrics}
246
- />
247
  </div>
248
 
249
  {selectedModels.size === 0 || selectedMetrics.size === 0 ? (
@@ -252,332 +257,25 @@ const LeaderboardTable: React.FC<LeaderboardTableProps> = ({ dataset }) => {
252
  </div>
253
  ) : (
254
  <>
255
- <table className="table w-full">
256
- <thead>
257
- <tr>
258
- <th>Group / Subgroup</th>
259
- {overallMetrics.map((metric) => (
260
- <th
261
- key={metric}
262
- colSpan={tableHeader.filter((model) => selectedModels.has(model)).length}
263
- className="text-center border-x"
264
- >
265
- {metric}
266
- </th>
267
- ))}
268
- </tr>
269
- <tr>
270
- <th></th>
271
- {overallMetrics.map((metric) => (
272
- <React.Fragment key={`header-models-${metric}`}>
273
- {tableHeader
274
- .filter((model) => selectedModels.has(model))
275
- .map((model) => (
276
- <th key={`${metric}-${model}`} className="text-center text-xs">
277
- {model}
278
- </th>
279
- ))}
280
- </React.Fragment>
281
- ))}
282
- </tr>
283
- </thead>
284
- <tbody>
285
- {/* First render each group */}
286
- {Object.entries(groups).map(([group, subGroups]) => {
287
- // Skip the "Overall" group completely
288
- if (group === 'Overall') return null
289
-
290
- // Get all metrics for this group
291
- const allGroupMetrics = Object.values(subGroups).flat()
292
- // Filter to only include selected metrics
293
- const visibleGroupMetrics = filterMetricsByGroupAndSubgroup(
294
- allGroupMetrics,
295
- group
296
- )
297
-
298
- // Skip this group if no metrics are selected
299
- if (visibleGroupMetrics.length === 0) return null
300
-
301
- return (
302
- <React.Fragment key={group}>
303
- {/* Group row with average stats for the entire group */}
304
- <tr
305
- className="bg-base-200 cursor-pointer hover:bg-base-300"
306
- onClick={() => toggleGroup(group)}
307
- >
308
- <td className="font-medium">
309
- {openGroups[group] ? '▼ ' : '▶ '}
310
- {group}
311
- </td>
312
- {/* For each metric column */}
313
- {overallMetrics.map((metric) => (
314
- // Render sub-columns for each model
315
- <React.Fragment key={`${group}-${metric}`}>
316
- {tableHeader
317
- .filter((model) => selectedModels.has(model))
318
- .map((col) => {
319
- // Find all metrics in this group that match the current metric name
320
- const allMetricsWithName = findAllMetricsForName(metric)
321
- const metricsInGroupForThisMetric = visibleGroupMetrics.filter(
322
- (m) => allMetricsWithName.includes(m)
323
- )
324
- const stats = calculateStats(metricsInGroupForThisMetric, col)
325
-
326
- return (
327
- <td
328
- key={`${group}-${metric}-${col}`}
329
- className="font-medium text-center"
330
- >
331
- {!isNaN(stats.avg)
332
- ? `${stats.avg.toFixed(3)} ± ${stats.stdDev.toFixed(3)}`
333
- : 'N/A'}
334
- </td>
335
- )
336
- })}
337
- </React.Fragment>
338
- ))}
339
- </tr>
340
-
341
- {/* Only render subgroups if group is open */}
342
- {openGroups[group] &&
343
- Object.entries(subGroups).map(([subGroup, metrics]) => {
344
- // Filter to only include selected metrics in this subgroup
345
- const visibleSubgroupMetrics = filterMetricsByGroupAndSubgroup(
346
- metrics,
347
- group,
348
- subGroup
349
- )
350
-
351
- // Skip this subgroup if no metrics are selected
352
- if (visibleSubgroupMetrics.length === 0) return null
353
-
354
- return (
355
- <React.Fragment key={`${group}-${subGroup}`}>
356
- {/* Subgroup row with average stats for the subgroup */}
357
- <tr
358
- className="bg-base-100 cursor-pointer hover:bg-base-200"
359
- onClick={() => toggleSubGroup(group, subGroup)}
360
- >
361
- <td className="pl-6 font-medium">
362
- {openSubGroups[group]?.[subGroup] ? '▼ ' : '▶ '}
363
- {subGroup}
364
- </td>
365
- {/* For each metric column */}
366
- {overallMetrics.map((metric) => (
367
- // Render sub-columns for each model
368
- <React.Fragment key={`${group}-${subGroup}-${metric}`}>
369
- {tableHeader
370
- .filter((model) => selectedModels.has(model))
371
- .map((col) => {
372
- // Find all metrics in this subgroup that match the current metric name
373
- const allMetricsWithName = findAllMetricsForName(metric)
374
- const metricsInSubgroupForThisMetric =
375
- visibleSubgroupMetrics.filter((m) =>
376
- allMetricsWithName.includes(m)
377
- )
378
- const stats = calculateStats(
379
- metricsInSubgroupForThisMetric,
380
- col
381
- )
382
-
383
- return (
384
- <td
385
- key={`${group}-${subGroup}-${metric}-${col}`}
386
- className="font-medium text-center"
387
- >
388
- {!isNaN(stats.avg)
389
- ? `${stats.avg.toFixed(3)} ± ${stats.stdDev.toFixed(3)}`
390
- : 'N/A'}
391
- </td>
392
- )
393
- })}
394
- </React.Fragment>
395
- ))}
396
- </tr>
397
-
398
- {/* Individual metric rows */}
399
- {openSubGroups[group]?.[subGroup] &&
400
- // Sort visibleSubgroupMetrics alphabetically by the clean metric name
401
- [...visibleSubgroupMetrics]
402
- .sort((a, b) => {
403
- // For metrics with format {category}_{strength}_{overall_metric_name},
404
- // First sort by category, then by overall_metric_name, then by strength
405
-
406
- // First extract the overall metric group
407
- const getOverallMetricGroup = (metric: string) => {
408
- for (const overall of overallMetrics) {
409
- if (
410
- metric.endsWith(`_${overall}`) ||
411
- metric === overall
412
- ) {
413
- return overall
414
- }
415
- }
416
- return ''
417
- }
418
-
419
- const overallA = getOverallMetricGroup(a)
420
- const overallB = getOverallMetricGroup(b)
421
-
422
- // Extract the strength (last part before the overall metric)
423
- const stripOverall = (metric: string, overall: string) => {
424
- if (metric.endsWith(`_${overall}`)) {
425
- // Remove the overall metric group and any preceding underscore
426
- const stripped = metric.slice(
427
- 0,
428
- metric.length - overall.length - 1
429
- )
430
- const parts = stripped.split('_')
431
- return parts.length > 0 ? parts[parts.length - 1] : ''
432
- }
433
- return metric
434
- }
435
-
436
- // Extract the category (what remains after removing strength and overall_metric_name)
437
- const getCategory = (metric: string, overall: string) => {
438
- if (metric.endsWith(`_${overall}`)) {
439
- const stripped = metric.slice(
440
- 0,
441
- metric.length - overall.length - 1
442
- )
443
- const parts = stripped.split('_')
444
- // Remove the last part (strength) and join the rest (category)
445
- return parts.length > 1
446
- ? parts.slice(0, parts.length - 1).join('_')
447
- : ''
448
- }
449
- return metric
450
- }
451
-
452
- const categoryA = getCategory(a, overallA)
453
- const categoryB = getCategory(b, overallB)
454
-
455
- // First sort by category
456
- if (categoryA !== categoryB) {
457
- return categoryA.localeCompare(categoryB)
458
- }
459
-
460
- // Then sort by overall metric name
461
- if (overallA !== overallB) {
462
- return overallA.localeCompare(overallB)
463
- }
464
-
465
- // Finally sort by strength
466
- const subA = stripOverall(a, overallA)
467
- const subB = stripOverall(b, overallB)
468
-
469
- // Try to parse subA and subB as numbers, handling k/m/b suffixes
470
- const parseNumber = (str: string) => {
471
- const match = str.match(/^(\d+(?:\.\d+)?)([kKmMbB]?)$/)
472
- if (!match) return NaN
473
- let [_, num, suffix] = match
474
- let value = parseFloat(num)
475
- switch (suffix.toLowerCase()) {
476
- case 'k':
477
- value *= 1e3
478
- break
479
- case 'm':
480
- value *= 1e6
481
- break
482
- case 'b':
483
- value *= 1e9
484
- break
485
- }
486
- return value
487
- }
488
-
489
- const numA = parseNumber(subA)
490
- const numB = parseNumber(subB)
491
-
492
- if (!isNaN(numA) && !isNaN(numB)) {
493
- return numA - numB
494
- }
495
- // Fallback to string comparison if not both numbers
496
- return subA.localeCompare(subB)
497
- })
498
- .map((metric) => {
499
- const row = tableRows.find((r) => r.metric === metric)
500
- if (!row) return null
501
-
502
- // Extract the metric name (after the underscore)
503
- const metricName = metric.includes('_')
504
- ? metric.split('_').slice(1).join('_')
505
- : metric
506
-
507
- return (
508
- <tr key={metric} className="hover:bg-base-100">
509
- <td className="pl-10">{metric}</td>
510
- {/* For each metric column */}
511
- {overallMetrics.map((oMetric) => {
512
- // Only show values for the matching metric
513
- const isMatchingMetric =
514
- findAllMetricsForName(oMetric).includes(metric)
515
-
516
- if (!isMatchingMetric) {
517
- // Fill empty cells for non-matching metrics
518
- return (
519
- <React.Fragment key={`${metric}-${oMetric}`}>
520
- {tableHeader
521
- .filter((model) => selectedModels.has(model))
522
- .map((col) => (
523
- <td
524
- key={`${metric}-${oMetric}-${col}`}
525
- className="text-center"
526
- ></td>
527
- ))}
528
- </React.Fragment>
529
- )
530
- }
531
-
532
- // Show values for the matching metric
533
- return (
534
- <React.Fragment key={`${metric}-${oMetric}`}>
535
- {tableHeader
536
- .filter((model) => selectedModels.has(model))
537
- .map((col) => {
538
- const cell = row[col]
539
- return (
540
- <td
541
- key={`${metric}-${oMetric}-${col}`}
542
- className="text-center"
543
- >
544
- {!isNaN(Number(cell))
545
- ? Number(Number(cell).toFixed(3))
546
- : cell}
547
- </td>
548
- )
549
- })}
550
- </React.Fragment>
551
- )
552
- })}
553
- </tr>
554
- )
555
- })}
556
- </React.Fragment>
557
- )
558
- })}
559
- </React.Fragment>
560
- )
561
- })}
562
- </tbody>
563
- </table>
564
-
565
- {/* Separate table for metrics that don't belong to any overall group */}
566
  {(() => {
567
  const standaloneMetrics = findStandaloneMetrics()
568
  if (standaloneMetrics.length === 0) return null
569
-
570
  return (
571
- <div className="mt-8">
572
- <h4 className="font-bold mb-2">Other Metrics</h4>
573
- <table className="table w-full">
574
  <thead>
575
  <tr>
576
- <th>Metric</th>
 
 
577
  {tableHeader
578
  .filter((model) => selectedModels.has(model))
579
  .map((model) => (
580
- <th key={`standalone-${model}`} className="text-center text-xs">
 
 
 
581
  {model}
582
  </th>
583
  ))}
@@ -587,16 +285,20 @@ const LeaderboardTable: React.FC<LeaderboardTableProps> = ({ dataset }) => {
587
  {standaloneMetrics.sort().map((metric) => {
588
  const row = tableRows.find((r) => r.metric === metric)
589
  if (!row) return null
590
-
591
  return (
592
  <tr key={`standalone-${metric}`} className="hover:bg-base-100">
593
- <td>{metric}</td>
 
 
594
  {tableHeader
595
  .filter((model) => selectedModels.has(model))
596
  .map((col) => {
597
  const cell = row[col]
598
  return (
599
- <td key={`standalone-${metric}-${col}`} className="text-center">
 
 
 
600
  {!isNaN(Number(cell))
601
  ? Number(Number(cell).toFixed(3))
602
  : cell}
@@ -611,6 +313,342 @@ const LeaderboardTable: React.FC<LeaderboardTableProps> = ({ dataset }) => {
611
  </div>
612
  )
613
  })()}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
614
  </>
615
  )}
616
  </div>
 
1
  import React, { useEffect, useState } from 'react'
 
2
  import LeaderboardFilter from './LeaderboardFilter'
3
+ import LoadingSpinner from './LoadingSpinner'
4
 
5
  interface LeaderboardTableProps {
6
+ benchmarkData: any
7
+ selectedModels: Set<string>
8
  }
9
 
10
  interface Row {
 
16
  [group: string]: { [subgroup: string]: string[] }
17
  }
18
 
19
+ const OverallMetricFilter: React.FC<{
20
+ overallMetrics: string[]
21
+ selectedOverallMetrics: Set<string>
22
+ setSelectedOverallMetrics: (metrics: Set<string>) => void
23
+ }> = ({ overallMetrics, selectedOverallMetrics, setSelectedOverallMetrics }) => {
24
+ const toggleMetric = (metric: string) => {
25
+ const newSelected = new Set(selectedOverallMetrics)
26
+ if (newSelected.has(metric)) {
27
+ newSelected.delete(metric)
28
+ } else {
29
+ newSelected.add(metric)
30
+ }
31
+ setSelectedOverallMetrics(newSelected)
32
+ }
33
+ return (
34
+ <div className="w-full mb-4">
35
+ <fieldset className="fieldset w-full p-4 rounded border border-gray-700">
36
+ <legend className="fieldset-legend font-semibold">
37
+ Metrics ({selectedOverallMetrics.size}/{overallMetrics.length})
38
+ </legend>
39
+ <div className="grid grid-cols-2 md:grid-cols-4 lg:grid-cols-6 gap-1 max-h-48 overflow-y-auto pr-2">
40
+ {overallMetrics.map((metric) => (
41
+ <label key={metric} className="flex items-center gap-2 text-sm">
42
+ <input
43
+ type="checkbox"
44
+ className="form-checkbox h-4 w-4"
45
+ checked={selectedOverallMetrics.has(metric)}
46
+ onChange={() => toggleMetric(metric)}
47
+ />
48
+ <span className="truncate" title={metric}>
49
+ {metric}
50
+ </span>
51
+ </label>
52
+ ))}
53
+ </div>
54
+ </fieldset>
55
+ </div>
56
+ )
57
  }
58
 
59
+ const LeaderboardTable: React.FC<LeaderboardTableProps> = ({ benchmarkData, selectedModels }) => {
60
  const [tableRows, setTableRows] = useState<Row[]>([])
61
  const [tableHeader, setTableHeader] = useState<string[]>([])
 
62
  const [error, setError] = useState<string | null>(null)
63
  const [groups, setGroups] = useState<Groups>({})
64
  const [openGroups, setOpenGroups] = useState<{ [key: string]: boolean }>({})
65
  const [openSubGroups, setOpenSubGroups] = useState<{ [key: string]: { [key: string]: boolean } }>(
66
  {}
67
  )
 
68
  const [selectedMetrics, setSelectedMetrics] = useState<Set<string>>(new Set())
 
 
 
69
  const [overallMetrics, setOverallMetrics] = useState<string[]>([])
70
+ const [selectedOverallMetrics, setSelectedOverallMetrics] = useState<Set<string>>(new Set())
71
 
72
  useEffect(() => {
73
+ if (!benchmarkData) {
74
+ return
75
+ }
76
+ try {
77
+ const data = benchmarkData
78
+ const rows: Row[] = data['rows']
79
+ const allGroups = data['groups'] as { [key: string]: string[] }
80
+ const { Overall: overallGroup, ...groups } = allGroups
81
+ const uniqueMetrics = new Set<string>()
82
+ overallGroup?.forEach((metric) => {
83
+ if (metric.includes('_')) {
84
+ const metricName = metric.split('_').slice(1).join('_')
85
+ uniqueMetrics.add(metricName)
86
+ }
87
+ })
88
+ setOverallMetrics(Array.from(uniqueMetrics).sort())
89
+ setSelectedOverallMetrics(new Set(Array.from(uniqueMetrics)))
90
+ const groupsData = Object.entries(groups)
91
+ .sort(([groupA], [groupB]) => {
92
+ if (groupA === 'Overall') return -1
93
+ if (groupB === 'Overall') return 1
94
+ return groupA.localeCompare(groupB)
95
  })
96
+ .reduce(
97
+ (acc, [group, metrics]) => {
98
+ const sortedMetrics = [...metrics].sort()
99
+ acc[group] = sortedMetrics.reduce<{ [key: string]: string[] }>((subAcc, metric) => {
100
+ const [mainGroup, subGroup] = metric.split('_')
101
+ if (!subAcc[mainGroup]) {
102
+ subAcc[mainGroup] = []
103
+ }
104
+ subAcc[mainGroup].push(metric)
105
+ return subAcc
106
+ }, {})
107
+ acc[group] = Object.fromEntries(
108
+ Object.entries(acc[group]).sort(([subGroupA], [subGroupB]) =>
109
+ subGroupA.localeCompare(subGroupB)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  )
111
+ )
112
+ return acc
113
+ },
114
+ {} as { [key: string]: { [key: string]: string[] } }
115
+ )
116
+ const allKeys: string[] = Array.from(new Set(rows.flatMap((row) => Object.keys(row))))
117
+ const headers = allKeys.filter((key) => key !== 'metric')
118
+ const initialOpenGroups: { [key: string]: boolean } = {}
119
+ const initialOpenSubGroups: { [key: string]: { [key: string]: boolean } } = {}
120
+ Object.keys(groupsData).forEach((group) => {
121
+ initialOpenGroups[group] = false
122
+ initialOpenSubGroups[group] = {}
123
+ Object.keys(groupsData[group]).forEach((subGroup) => {
124
+ initialOpenSubGroups[group][subGroup] = false
 
 
 
 
 
 
125
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  })
127
+ const allMetrics = Object.values(groups).flat()
128
+ setSelectedMetrics(new Set(allMetrics))
129
+ setTableHeader(headers)
130
+ setTableRows(rows)
131
+ setGroups(groupsData)
132
+ setOpenGroups(initialOpenGroups)
133
+ setOpenSubGroups(initialOpenSubGroups)
134
+ setError(null)
135
+ } catch (err: any) {
136
+ setError('Failed to parse benchmark data, please try again: ' + err.message)
137
+ }
138
+ }, [benchmarkData])
139
 
140
  const toggleGroup = (group: string) => {
141
  setOpenGroups((prev) => ({ ...prev, [group]: !prev[group] }))
 
234
  }
235
 
236
  return (
237
+ <div className="rounded shadow">
 
238
  {error && <div className="text-red-500">{error}</div>}
239
+ {!error && (
240
+ <div className="flex flex-col gap-8">
 
241
  <div className="flex flex-col gap-4">
242
+ <OverallMetricFilter
243
+ overallMetrics={overallMetrics}
244
+ selectedOverallMetrics={selectedOverallMetrics}
245
+ setSelectedOverallMetrics={setSelectedOverallMetrics}
246
  />
247
+ {/* <LeaderboardFilter
248
  groups={groups}
249
  selectedMetrics={selectedMetrics}
250
  setSelectedMetrics={setSelectedMetrics}
251
+ /> */}
252
  </div>
253
 
254
  {selectedModels.size === 0 || selectedMetrics.size === 0 ? (
 
257
  </div>
258
  ) : (
259
  <>
260
+ {/* Standalone metrics table */}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
  {(() => {
262
  const standaloneMetrics = findStandaloneMetrics()
263
  if (standaloneMetrics.length === 0) return null
 
264
  return (
265
+ <div className="overflow-x-auto max-h-[80vh] overflow-y-auto">
266
+ <table className="table w-full min-w-max border-gray-700 border">
 
267
  <thead>
268
  <tr>
269
+ <th className="sticky left-0 top-0 bg-base-100 z-20 border-gray-700 border">
270
+ Metric
271
+ </th>
272
  {tableHeader
273
  .filter((model) => selectedModels.has(model))
274
  .map((model) => (
275
+ <th
276
+ key={`standalone-${model}`}
277
+ className="sticky top-0 bg-base-100 z-10 text-center text-xs border-gray-700 border"
278
+ >
279
  {model}
280
  </th>
281
  ))}
 
285
  {standaloneMetrics.sort().map((metric) => {
286
  const row = tableRows.find((r) => r.metric === metric)
287
  if (!row) return null
 
288
  return (
289
  <tr key={`standalone-${metric}`} className="hover:bg-base-100">
290
+ <td className="sticky left-0 bg-base-100 z-10 border-gray-700 border">
291
+ {metric}
292
+ </td>
293
  {tableHeader
294
  .filter((model) => selectedModels.has(model))
295
  .map((col) => {
296
  const cell = row[col]
297
  return (
298
+ <td
299
+ key={`standalone-${metric}-${col}`}
300
+ className="text-center border-gray-700 border"
301
+ >
302
  {!isNaN(Number(cell))
303
  ? Number(Number(cell).toFixed(3))
304
  : cell}
 
313
  </div>
314
  )
315
  })()}
316
+
317
+ {/* Main metrics table */}
318
+ <div className="overflow-x-auto max-h-[80vh] overflow-y-auto">
319
+ <table className="table w-full min-w-max border-gray-700 border">
320
+ <thead>
321
+ <tr>
322
+ <th className="sticky left-0 top-0 bg-base-100 z-20 border-gray-700 border">
323
+ Attack Category Metrics
324
+ </th>
325
+ {overallMetrics
326
+ .filter((metric) => selectedOverallMetrics.has(metric))
327
+ .map((metric) => (
328
+ <th
329
+ key={metric}
330
+ colSpan={
331
+ tableHeader.filter((model) => selectedModels.has(model)).length
332
+ }
333
+ className="sticky top-0 bg-base-100 z-10 text-center border-x border-gray-300 border border-gray-700 border"
334
+ >
335
+ {metric}
336
+ </th>
337
+ ))}
338
+ </tr>
339
+ <tr>
340
+ <th className="sticky left-0 bg-base-100 z-10 border-gray-700 border"></th>
341
+ {overallMetrics
342
+ .filter((metric) => selectedOverallMetrics.has(metric))
343
+ .map((metric) => (
344
+ <React.Fragment key={`header-models-${metric}`}>
345
+ {tableHeader
346
+ .filter((model) => selectedModels.has(model))
347
+ .map((model) => (
348
+ <th
349
+ key={`${metric}-${model}`}
350
+ className="sticky top-12 bg-base-100 z-10 text-center text-xs border-gray-700 border border-bottom-solid border-b-gray-700 border-b-2"
351
+ >
352
+ {model}
353
+ </th>
354
+ ))}
355
+ </React.Fragment>
356
+ ))}
357
+ </tr>
358
+ </thead>
359
+ <tbody>
360
+ {/* First render each group */}
361
+ {Object.entries(groups).map(([group, subGroups]) => {
362
+ // Skip the "Overall" group completely
363
+ if (group === 'Overall') return null
364
+
365
+ // Get all metrics for this group
366
+ const allGroupMetrics = Object.values(subGroups).flat()
367
+ // Filter to only include selected metrics
368
+ const visibleGroupMetrics = filterMetricsByGroupAndSubgroup(
369
+ allGroupMetrics,
370
+ group
371
+ )
372
+
373
+ // Skip this group if no metrics are selected
374
+ if (visibleGroupMetrics.length === 0) return null
375
+
376
+ return (
377
+ <React.Fragment key={group}>
378
+ {/* Group row with average stats for the entire group */}
379
+ <tr
380
+ className="bg-base-200 cursor-pointer hover:bg-base-300"
381
+ onClick={() => toggleGroup(group)}
382
+ >
383
+ <td className="sticky left-0 bg-base-200 z-10 font-medium border-gray-700 border">
384
+ {openGroups[group] ? '▼ ' : '▶ '}
385
+ {group}
386
+ </td>
387
+ {/* For each metric column */}
388
+ {overallMetrics
389
+ .filter((metric) => selectedOverallMetrics.has(metric))
390
+ .map((metric) => (
391
+ // Render sub-columns for each model
392
+ <React.Fragment key={`${group}-${metric}`}>
393
+ {tableHeader
394
+ .filter((model) => selectedModels.has(model))
395
+ .map((col) => {
396
+ // Find all metrics in this group that match the current metric name
397
+ const allMetricsWithName = findAllMetricsForName(metric)
398
+ const metricsInGroupForThisMetric =
399
+ visibleGroupMetrics.filter((m) =>
400
+ allMetricsWithName.includes(m)
401
+ )
402
+ const stats = calculateStats(metricsInGroupForThisMetric, col)
403
+
404
+ return (
405
+ <td
406
+ key={`${group}-${metric}-${col}`}
407
+ className="font-medium text-center border-gray-700 border"
408
+ >
409
+ {!isNaN(stats.avg)
410
+ ? `${stats.avg.toFixed(3)} ± ${stats.stdDev.toFixed(3)}`
411
+ : 'N/A'}
412
+ </td>
413
+ )
414
+ })}
415
+ </React.Fragment>
416
+ ))}
417
+ </tr>
418
+
419
+ {/* Only render subgroups if group is open */}
420
+ {openGroups[group] &&
421
+ Object.entries(subGroups).map(([subGroup, metrics]) => {
422
+ // Filter to only include selected metrics in this subgroup
423
+ const visibleSubgroupMetrics = filterMetricsByGroupAndSubgroup(
424
+ metrics,
425
+ group,
426
+ subGroup
427
+ )
428
+
429
+ // Skip this subgroup if no metrics are selected
430
+ if (visibleSubgroupMetrics.length === 0) return null
431
+
432
+ return (
433
+ <React.Fragment key={`${group}-${subGroup}`}>
434
+ {/* Subgroup row with average stats for the subgroup */}
435
+ <tr
436
+ className="bg-base-100 cursor-pointer hover:bg-base-200"
437
+ onClick={() => toggleSubGroup(group, subGroup)}
438
+ >
439
+ <td className="sticky left-0 bg-base-100 z-10 pl-6 font-medium border-gray-700 border">
440
+ {openSubGroups[group]?.[subGroup] ? '▼ ' : '▶ '}
441
+ {subGroup}
442
+ </td>
443
+ {/* For each metric column */}
444
+ {overallMetrics
445
+ .filter((metric) => selectedOverallMetrics.has(metric))
446
+ .map((metric) => (
447
+ // Render sub-columns for each model
448
+ <React.Fragment key={`${group}-${subGroup}-${metric}`}>
449
+ {tableHeader
450
+ .filter((model) => selectedModels.has(model))
451
+ .map((col) => {
452
+ // Find all metrics in this subgroup that match the current metric name
453
+ const allMetricsWithName =
454
+ findAllMetricsForName(metric)
455
+ const metricsInSubgroupForThisMetric =
456
+ visibleSubgroupMetrics.filter((m) =>
457
+ allMetricsWithName.includes(m)
458
+ )
459
+ const stats = calculateStats(
460
+ metricsInSubgroupForThisMetric,
461
+ col
462
+ )
463
+
464
+ return (
465
+ <td
466
+ key={`${group}-${subGroup}-${metric}-${col}`}
467
+ className="font-medium text-center border-gray-700 border"
468
+ >
469
+ {!isNaN(stats.avg)
470
+ ? `${stats.avg.toFixed(3)} ± ${stats.stdDev.toFixed(3)}`
471
+ : 'N/A'}
472
+ </td>
473
+ )
474
+ })}
475
+ </React.Fragment>
476
+ ))}
477
+ </tr>
478
+
479
+ {/* Individual metric rows */}
480
+ {openSubGroups[group]?.[subGroup] &&
481
+ // Sort visibleSubgroupMetrics alphabetically by the clean metric name
482
+ [...visibleSubgroupMetrics]
483
+ .sort((a, b) => {
484
+ // For metrics with format {category}_{strength}_{overall_metric_name},
485
+ // First sort by category, then by overall_metric_name, then by strength
486
+
487
+ // First extract the overall metric group
488
+ const getOverallMetricGroup = (metric: string) => {
489
+ for (const overall of overallMetrics) {
490
+ if (
491
+ metric.endsWith(`_${overall}`) ||
492
+ metric === overall
493
+ ) {
494
+ return overall
495
+ }
496
+ }
497
+ return ''
498
+ }
499
+
500
+ const overallA = getOverallMetricGroup(a)
501
+ const overallB = getOverallMetricGroup(b)
502
+
503
+ // Extract the strength (last part before the overall metric)
504
+ const stripOverall = (metric: string, overall: string) => {
505
+ if (metric.endsWith(`_${overall}`)) {
506
+ // Remove the overall metric group and any preceding underscore
507
+ const stripped = metric.slice(
508
+ 0,
509
+ metric.length - overall.length - 1
510
+ )
511
+ const parts = stripped.split('_')
512
+ return parts.length > 0 ? parts[parts.length - 1] : ''
513
+ }
514
+ return metric
515
+ }
516
+
517
+ // Extract the category (what remains after removing strength and overall_metric_name)
518
+ const getCategory = (metric: string, overall: string) => {
519
+ if (metric.endsWith(`_${overall}`)) {
520
+ const stripped = metric.slice(
521
+ 0,
522
+ metric.length - overall.length - 1
523
+ )
524
+ const parts = stripped.split('_')
525
+ // Remove the last part (strength) and join the rest (category)
526
+ return parts.length > 1
527
+ ? parts.slice(0, parts.length - 1).join('_')
528
+ : ''
529
+ }
530
+ return metric
531
+ }
532
+
533
+ const categoryA = getCategory(a, overallA)
534
+ const categoryB = getCategory(b, overallB)
535
+
536
+ // First sort by category
537
+ if (categoryA !== categoryB) {
538
+ return categoryA.localeCompare(categoryB)
539
+ }
540
+
541
+ // Then sort by overall metric name
542
+ if (overallA !== overallB) {
543
+ return overallA.localeCompare(overallB)
544
+ }
545
+
546
+ // Finally sort by strength
547
+ const subA = stripOverall(a, overallA)
548
+ const subB = stripOverall(b, overallB)
549
+
550
+ // Try to parse subA and subB as numbers, handling k/m/b suffixes
551
+ const parseNumber = (str: string) => {
552
+ const match = str.match(/^(\d+(?:\.\d+)?)([kKmMbB]?)$/)
553
+ if (!match) return NaN
554
+ let [_, num, suffix] = match
555
+ let value = parseFloat(num)
556
+ switch (suffix.toLowerCase()) {
557
+ case 'k':
558
+ value *= 1e3
559
+ break
560
+ case 'm':
561
+ value *= 1e6
562
+ break
563
+ case 'b':
564
+ value *= 1e9
565
+ break
566
+ }
567
+ return value
568
+ }
569
+
570
+ const numA = parseNumber(subA)
571
+ const numB = parseNumber(subB)
572
+
573
+ if (!isNaN(numA) && !isNaN(numB)) {
574
+ return numA - numB
575
+ }
576
+ // Fallback to string comparison if not both numbers
577
+ return subA.localeCompare(subB)
578
+ })
579
+ .map((metric) => {
580
+ const row = tableRows.find((r) => r.metric === metric)
581
+ if (!row) return null
582
+
583
+ // Extract the metric name (after the underscore)
584
+ const metricName = metric.includes('_')
585
+ ? metric.split('_').slice(1).join('_')
586
+ : metric
587
+
588
+ return (
589
+ <tr key={metric} className="hover:bg-base-100">
590
+ <td className="sticky left-0 bg-base-100 z-10 pl-10 border-gray-700 border">
591
+ {metric}
592
+ </td>
593
+ {/* For each metric column */}
594
+ {overallMetrics
595
+ .filter((oMetric) =>
596
+ selectedOverallMetrics.has(oMetric)
597
+ )
598
+ .map((oMetric) => {
599
+ // Only show values for the matching metric
600
+ const isMatchingMetric =
601
+ findAllMetricsForName(oMetric).includes(metric)
602
+
603
+ if (!isMatchingMetric) {
604
+ // Fill empty cells for non-matching metrics
605
+ return (
606
+ <React.Fragment key={`${metric}-${oMetric}`}>
607
+ {tableHeader
608
+ .filter((model) =>
609
+ selectedModels.has(model)
610
+ )
611
+ .map((col) => (
612
+ <td
613
+ key={`${metric}-${oMetric}-${col}`}
614
+ className="text-center border-gray-700 border"
615
+ ></td>
616
+ ))}
617
+ </React.Fragment>
618
+ )
619
+ }
620
+ return (
621
+ <React.Fragment key={`${metric}-${oMetric}`}>
622
+ {tableHeader
623
+ .filter((model) => selectedModels.has(model))
624
+ .map((col) => {
625
+ const cell = row[col]
626
+ return (
627
+ <td
628
+ key={`${metric}-${oMetric}-${col}`}
629
+ className="text-center border-gray-700 border"
630
+ >
631
+ {!isNaN(Number(cell))
632
+ ? Number(Number(cell).toFixed(3))
633
+ : cell}
634
+ </td>
635
+ )
636
+ })}
637
+ </React.Fragment>
638
+ )
639
+ })}
640
+ </tr>
641
+ )
642
+ })}
643
+ </React.Fragment>
644
+ )
645
+ })}
646
+ </React.Fragment>
647
+ )
648
+ })}
649
+ </tbody>
650
+ </table>
651
+ </div>
652
  </>
653
  )}
654
  </div>
frontend/src/components/LoadingSpinner.tsx ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react'
2
+
3
+ interface LoadingSpinnerProps {
4
+ minHeight?: string
5
+ }
6
+
7
+ const LoadingSpinner: React.FC<LoadingSpinnerProps> = ({ minHeight = '300px' }) => {
8
+ return (
9
+ <div className={`flex items-center justify-center min-h-[${minHeight}]`}>
10
+ <span className="loading loading-spinner loading-lg text-primary"></span>
11
+ </div>
12
+ )
13
+ }
14
+
15
+ export default LoadingSpinner
frontend/src/components/ModelFilter.tsx CHANGED
@@ -19,7 +19,7 @@ const ModelFilter: React.FC<ModelFilterProps> = ({ models, selectedModels, setSe
19
 
20
  return (
21
  <div className="w-full mb-4">
22
- <fieldset className="fieldset w-full p-4 rounded border">
23
  <legend className="fieldset-legend font-semibold">
24
  Models ({selectedModels.size}/{models.length})
25
  </legend>
 
19
 
20
  return (
21
  <div className="w-full mb-4">
22
+ <fieldset className="fieldset w-full p-4 rounded border border-gray-700">
23
  <legend className="fieldset-legend font-semibold">
24
  Models ({selectedModels.size}/{models.length})
25
  </legend>