Spaces:
Running
Running
Mark Duppenthaler
commited on
Commit
·
b087e88
1
Parent(s):
54be5f9
Combined leaderboard, simplified filters
Browse files- backend/app.py +29 -22
- backend/chart.py +11 -41
- backend/config.py +48 -4
- frontend/src/App.tsx +27 -12
- frontend/src/components/DatasetSelector.tsx +12 -12
- frontend/src/components/Examples.tsx +2 -1
- frontend/src/components/LeaderBoardPage.tsx +0 -34
- frontend/src/components/LeaderboardChart.tsx +241 -0
- frontend/src/components/LeaderboardFilter.tsx +1 -1
- frontend/src/components/LeaderboardPage.tsx +77 -0
- frontend/src/components/LeaderboardTable.tsx +467 -429
- frontend/src/components/LoadingSpinner.tsx +15 -0
- frontend/src/components/ModelFilter.tsx +1 -1
backend/app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
from backend.chart import mk_variations
|
2 |
-
from backend.config import get_dataset_config
|
3 |
from backend.examples import audio_examples_tab, image_examples_tab, video_examples_tab
|
4 |
-
from flask import Flask, Response, send_from_directory
|
5 |
from flask_cors import CORS
|
6 |
import os
|
7 |
import logging
|
@@ -39,41 +39,47 @@ def index():
|
|
39 |
@app.route("/data/<path:dataset_name>")
|
40 |
def data_files(dataset_name):
|
41 |
"""
|
42 |
-
Serves csv files from
|
43 |
"""
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
logger.info(f"Looking for dataset file: {file_path}")
|
47 |
-
|
48 |
df = pd.read_csv(file_path)
|
49 |
logger.info(f"Processing dataset: {dataset_name}")
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
|
|
|
|
56 |
|
57 |
|
58 |
@app.route("/examples/<path:type>")
|
59 |
def example_files(type):
|
60 |
"""
|
61 |
-
Serve example files from
|
62 |
"""
|
63 |
|
64 |
-
abs_path = "https://dl.fbaipublicfiles.com/omnisealbench/"
|
65 |
-
|
66 |
# Switch based on the type parameter to call the appropriate tab function
|
67 |
if type == "image":
|
68 |
-
result = image_examples_tab(
|
69 |
return Response(json.dumps(result), mimetype="application/json")
|
70 |
elif type == "audio":
|
71 |
# Assuming you'll create these functions
|
72 |
-
result = audio_examples_tab(
|
73 |
return Response(json.dumps(result), mimetype="application/json")
|
74 |
elif type == "video":
|
75 |
# Assuming you'll create these functions
|
76 |
-
result = video_examples_tab(
|
77 |
return Response(json.dumps(result), mimetype="application/json")
|
78 |
else:
|
79 |
return "Invalid example type", 400
|
@@ -91,7 +97,7 @@ def proxy(url):
|
|
91 |
url = unquote(url)
|
92 |
|
93 |
# Make sure we're only proxying from trusted domains for security
|
94 |
-
if not url.startswith(
|
95 |
return {"error": "Only proxying from allowed domains is permitted"}, 403
|
96 |
|
97 |
response = requests.get(url, stream=True)
|
@@ -120,9 +126,9 @@ def proxy(url):
|
|
120 |
return {"error": str(e)}, 500
|
121 |
|
122 |
|
123 |
-
def get_leaderboard(
|
124 |
# Determine file type and handle accordingly
|
125 |
-
config
|
126 |
|
127 |
# This part adds on all the columns
|
128 |
df = get_old_format_dataframe(df, config["first_cols"], config["attack_scores"])
|
@@ -146,11 +152,12 @@ def get_leaderboard(dataset_name, df):
|
|
146 |
return Response(json.dumps(result), mimetype="application/json")
|
147 |
|
148 |
|
149 |
-
def get_chart(df):
|
150 |
# This function should return the chart data based on the DataFrame
|
151 |
# For now, we will just return a placeholder response
|
152 |
chart_data = mk_variations(
|
153 |
df,
|
|
|
154 |
# attacks_plot_metrics,
|
155 |
# audio_attacks_with_variations,
|
156 |
)
|
|
|
1 |
from backend.chart import mk_variations
|
2 |
+
from backend.config import ABS_DATASET_DOMAIN, ABS_DATASET_PATH, get_dataset_config
|
3 |
from backend.examples import audio_examples_tab, image_examples_tab, video_examples_tab
|
4 |
+
from flask import Flask, Response, send_from_directory, request
|
5 |
from flask_cors import CORS
|
6 |
import os
|
7 |
import logging
|
|
|
39 |
@app.route("/data/<path:dataset_name>")
|
40 |
def data_files(dataset_name):
|
41 |
"""
|
42 |
+
Serves csv files from S3.
|
43 |
"""
|
44 |
+
# Get dataset_type from query params
|
45 |
+
dataset_type = request.args.get("dataset_type")
|
46 |
+
if not dataset_type:
|
47 |
+
logger.error("No dataset_type provided in query parameters.")
|
48 |
+
return "Dataset type not specified", 400
|
49 |
+
|
50 |
+
# data_dir = os.path.join(os.path.dirname(__file__), "data")
|
51 |
+
file_path = os.path.join(ABS_DATASET_PATH, dataset_name) + f"_{dataset_type}.csv"
|
52 |
logger.info(f"Looking for dataset file: {file_path}")
|
53 |
+
try:
|
54 |
df = pd.read_csv(file_path)
|
55 |
logger.info(f"Processing dataset: {dataset_name}")
|
56 |
+
config = get_dataset_config(dataset_name)
|
57 |
+
if dataset_type == "benchmark":
|
58 |
+
return get_leaderboard(config, df)
|
59 |
+
elif dataset_type == "attacks_variations":
|
60 |
+
return get_chart(config, df)
|
61 |
+
except:
|
62 |
+
logger.error(f"Failed to fetch file: {file_path}")
|
63 |
+
return "File not found", 404
|
64 |
|
65 |
|
66 |
@app.route("/examples/<path:type>")
|
67 |
def example_files(type):
|
68 |
"""
|
69 |
+
Serve example files from S3.
|
70 |
"""
|
71 |
|
|
|
|
|
72 |
# Switch based on the type parameter to call the appropriate tab function
|
73 |
if type == "image":
|
74 |
+
result = image_examples_tab(ABS_DATASET_PATH)
|
75 |
return Response(json.dumps(result), mimetype="application/json")
|
76 |
elif type == "audio":
|
77 |
# Assuming you'll create these functions
|
78 |
+
result = audio_examples_tab(ABS_DATASET_PATH)
|
79 |
return Response(json.dumps(result), mimetype="application/json")
|
80 |
elif type == "video":
|
81 |
# Assuming you'll create these functions
|
82 |
+
result = video_examples_tab(ABS_DATASET_PATH)
|
83 |
return Response(json.dumps(result), mimetype="application/json")
|
84 |
else:
|
85 |
return "Invalid example type", 400
|
|
|
97 |
url = unquote(url)
|
98 |
|
99 |
# Make sure we're only proxying from trusted domains for security
|
100 |
+
if not url.startswith(ABS_DATASET_DOMAIN):
|
101 |
return {"error": "Only proxying from allowed domains is permitted"}, 403
|
102 |
|
103 |
response = requests.get(url, stream=True)
|
|
|
126 |
return {"error": str(e)}, 500
|
127 |
|
128 |
|
129 |
+
def get_leaderboard(config, df):
|
130 |
# Determine file type and handle accordingly
|
131 |
+
logger.warning(f"Processing dataset with config: {config}")
|
132 |
|
133 |
# This part adds on all the columns
|
134 |
df = get_old_format_dataframe(df, config["first_cols"], config["attack_scores"])
|
|
|
152 |
return Response(json.dumps(result), mimetype="application/json")
|
153 |
|
154 |
|
155 |
+
def get_chart(config, df):
|
156 |
# This function should return the chart data based on the DataFrame
|
157 |
# For now, we will just return a placeholder response
|
158 |
chart_data = mk_variations(
|
159 |
df,
|
160 |
+
config["attacks_with_variations"],
|
161 |
# attacks_plot_metrics,
|
162 |
# audio_attacks_with_variations,
|
163 |
)
|
backend/chart.py
CHANGED
@@ -2,44 +2,6 @@ import pandas as pd
|
|
2 |
|
3 |
from pathlib import Path
|
4 |
|
5 |
-
audio_attacks_with_variations = [
|
6 |
-
"random_noise",
|
7 |
-
"lowpass_filter",
|
8 |
-
"highpass_filter",
|
9 |
-
"boost_audio",
|
10 |
-
"duck_audio",
|
11 |
-
"shush",
|
12 |
-
]
|
13 |
-
|
14 |
-
attacks_plot_metrics = ["bit_acc", "log10_p_value", "TPR", "FPR", "watermark_det_score"]
|
15 |
-
|
16 |
-
image_attacks_with_variations = [
|
17 |
-
"center_crop",
|
18 |
-
"jpeg",
|
19 |
-
"brightness",
|
20 |
-
"contrast",
|
21 |
-
"saturation",
|
22 |
-
"sharpness",
|
23 |
-
"resize",
|
24 |
-
"perspective",
|
25 |
-
"median_filter",
|
26 |
-
"hue",
|
27 |
-
"gaussian_blur",
|
28 |
-
]
|
29 |
-
|
30 |
-
|
31 |
-
video_attacks_with_variations = [
|
32 |
-
"Rotate",
|
33 |
-
"Resize",
|
34 |
-
"Crop",
|
35 |
-
"Brightness",
|
36 |
-
"Contrast",
|
37 |
-
"Saturation",
|
38 |
-
"H264",
|
39 |
-
"H264rgb",
|
40 |
-
"H265",
|
41 |
-
]
|
42 |
-
|
43 |
|
44 |
def plot_data(metric, selected_attack, all_attacks_df):
|
45 |
attack_df = all_attacks_df[all_attacks_df.attack == selected_attack]
|
@@ -57,8 +19,7 @@ def plot_data(metric, selected_attack, all_attacks_df):
|
|
57 |
|
58 |
def mk_variations(
|
59 |
all_attacks_df,
|
60 |
-
|
61 |
-
attacks_with_variations: list[str] = audio_attacks_with_variations,
|
62 |
):
|
63 |
# all_attacks_df = pd.read_csv(csv_file)
|
64 |
# print(all_attacks_df)
|
@@ -92,8 +53,17 @@ def mk_variations(
|
|
92 |
# all_graphs,
|
93 |
# )
|
94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
return {
|
96 |
-
"metrics":
|
97 |
"attacks_with_variations": attacks_with_variations,
|
98 |
"all_attacks_df": all_attacks_df.to_dict(orient="records"),
|
99 |
}
|
|
|
2 |
|
3 |
from pathlib import Path
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
def plot_data(metric, selected_attack, all_attacks_df):
|
7 |
attack_df = all_attacks_df[all_attacks_df.attack == selected_attack]
|
|
|
19 |
|
20 |
def mk_variations(
|
21 |
all_attacks_df,
|
22 |
+
attacks_with_variations: list[str],
|
|
|
23 |
):
|
24 |
# all_attacks_df = pd.read_csv(csv_file)
|
25 |
# print(all_attacks_df)
|
|
|
53 |
# all_graphs,
|
54 |
# )
|
55 |
|
56 |
+
# Replace NaN values with None for JSON serialization
|
57 |
+
all_attacks_df = all_attacks_df.fillna(value="NaN")
|
58 |
+
attacks_plot_metrics = [
|
59 |
+
"bit_acc",
|
60 |
+
"log10_p_value",
|
61 |
+
"TPR",
|
62 |
+
"FPR",
|
63 |
+
"watermark_det_score",
|
64 |
+
]
|
65 |
return {
|
66 |
+
"metrics": attacks_plot_metrics,
|
67 |
"attacks_with_variations": attacks_with_variations,
|
68 |
"all_attacks_df": all_attacks_df.to_dict(orient="records"),
|
69 |
}
|
backend/config.py
CHANGED
@@ -1,5 +1,9 @@
|
|
|
|
|
|
|
|
|
|
1 |
def get_dataset_config(dataset_name):
|
2 |
-
if dataset_name == "
|
3 |
return {
|
4 |
"first_cols": [
|
5 |
"snr",
|
@@ -29,8 +33,16 @@ def get_dataset_config(dataset_name):
|
|
29 |
"aac_compression": "Compression",
|
30 |
"mp3_compression": "Compression",
|
31 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
}
|
33 |
-
elif dataset_name == "
|
34 |
return {
|
35 |
"first_cols": ["snr", "sisnr", "stoi", "pesq"],
|
36 |
"attack_scores": ["bit_acc", "log10_p_value", "TPR", "FPR"],
|
@@ -50,8 +62,16 @@ def get_dataset_config(dataset_name):
|
|
50 |
"aac_compression": "Compression",
|
51 |
"mp3_compression": "Compression",
|
52 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
}
|
54 |
-
elif dataset_name == "
|
55 |
return {
|
56 |
"first_cols": ["psnr", "ssim", "lpips", "decoder_time"],
|
57 |
"attack_scores": ["bit_acc", "log10_p_value", "TPR", "FPR"],
|
@@ -76,8 +96,21 @@ def get_dataset_config(dataset_name):
|
|
76 |
"avg": "Averages",
|
77 |
"none": "Baseline",
|
78 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
}
|
80 |
-
elif dataset_name == "
|
81 |
return {
|
82 |
"first_cols": ["psnr", "ssim", "msssim", "lpips", "vmaf", "decoder_time"],
|
83 |
"attack_scores": ["bit_acc", "log10_p_value", "TPR", "FPR"],
|
@@ -104,6 +137,17 @@ def get_dataset_config(dataset_name):
|
|
104 |
"H264_Crop_Brightness2": "Mixed",
|
105 |
"H264_Crop_Brightness3": "Mixed",
|
106 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
}
|
108 |
else:
|
109 |
raise ValueError(f"Unknown dataset: {dataset_name}")
|
|
|
1 |
+
ABS_DATASET_DOMAIN = "https://dl.fbaipublicfiles.com"
|
2 |
+
ABS_DATASET_PATH = f"{ABS_DATASET_DOMAIN}/omnisealbench/"
|
3 |
+
|
4 |
+
|
5 |
def get_dataset_config(dataset_name):
|
6 |
+
if dataset_name == "voxpopuli_1k/audio":
|
7 |
return {
|
8 |
"first_cols": [
|
9 |
"snr",
|
|
|
33 |
"aac_compression": "Compression",
|
34 |
"mp3_compression": "Compression",
|
35 |
},
|
36 |
+
"attacks_with_variations": [
|
37 |
+
"random_noise",
|
38 |
+
"lowpass_filter",
|
39 |
+
"highpass_filter",
|
40 |
+
"boost_audio",
|
41 |
+
"duck_audio",
|
42 |
+
"shush",
|
43 |
+
],
|
44 |
}
|
45 |
+
elif dataset_name == "ravdess_1k/audio":
|
46 |
return {
|
47 |
"first_cols": ["snr", "sisnr", "stoi", "pesq"],
|
48 |
"attack_scores": ["bit_acc", "log10_p_value", "TPR", "FPR"],
|
|
|
62 |
"aac_compression": "Compression",
|
63 |
"mp3_compression": "Compression",
|
64 |
},
|
65 |
+
"attacks_with_variations": [
|
66 |
+
"random_noise",
|
67 |
+
"lowpass_filter",
|
68 |
+
"highpass_filter",
|
69 |
+
"boost_audio",
|
70 |
+
"duck_audio",
|
71 |
+
"shush",
|
72 |
+
],
|
73 |
}
|
74 |
+
elif dataset_name == "val2014_1k/image":
|
75 |
return {
|
76 |
"first_cols": ["psnr", "ssim", "lpips", "decoder_time"],
|
77 |
"attack_scores": ["bit_acc", "log10_p_value", "TPR", "FPR"],
|
|
|
96 |
"avg": "Averages",
|
97 |
"none": "Baseline",
|
98 |
},
|
99 |
+
"attacks_with_variations": [
|
100 |
+
"center_crop",
|
101 |
+
"jpeg",
|
102 |
+
"brightness",
|
103 |
+
"contrast",
|
104 |
+
"saturation",
|
105 |
+
"sharpness",
|
106 |
+
"resize",
|
107 |
+
"perspective",
|
108 |
+
"median_filter",
|
109 |
+
"hue",
|
110 |
+
"gaussian_blur",
|
111 |
+
],
|
112 |
}
|
113 |
+
elif dataset_name == "sav_val_full/video":
|
114 |
return {
|
115 |
"first_cols": ["psnr", "ssim", "msssim", "lpips", "vmaf", "decoder_time"],
|
116 |
"attack_scores": ["bit_acc", "log10_p_value", "TPR", "FPR"],
|
|
|
137 |
"H264_Crop_Brightness2": "Mixed",
|
138 |
"H264_Crop_Brightness3": "Mixed",
|
139 |
},
|
140 |
+
"attacks_with_variations": [
|
141 |
+
"Rotate",
|
142 |
+
"Resize",
|
143 |
+
"Crop",
|
144 |
+
"Brightness",
|
145 |
+
"Contrast",
|
146 |
+
"Saturation",
|
147 |
+
"H264",
|
148 |
+
"H264rgb",
|
149 |
+
"H265",
|
150 |
+
],
|
151 |
}
|
152 |
else:
|
153 |
raise ValueError(f"Unknown dataset: {dataset_name}")
|
frontend/src/App.tsx
CHANGED
@@ -1,17 +1,32 @@
|
|
1 |
-
import { useState } from 'react'
|
2 |
import Examples from './components/Examples'
|
3 |
-
import
|
4 |
|
5 |
function App() {
|
6 |
const [activeTab, setActiveTab] = useState<
|
7 |
'leaderboard' | 'imageExamples' | 'audioExamples' | 'videoExamples'
|
8 |
>('leaderboard')
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
return (
|
11 |
<div className="min-h-screen w-11/12 mx-auto">
|
12 |
-
<div className="
|
13 |
-
<div className="
|
14 |
<h2 className="card-title">🥇 Omni Seal Bench Watermarking Leaderboard</h2>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
</div>
|
16 |
</div>
|
17 |
|
@@ -24,8 +39,8 @@ function App() {
|
|
24 |
checked={activeTab === 'leaderboard'}
|
25 |
onChange={() => setActiveTab('leaderboard')}
|
26 |
/>
|
27 |
-
<div className="tab-content bg-base-100
|
28 |
-
<
|
29 |
</div>
|
30 |
|
31 |
<input
|
@@ -36,8 +51,8 @@ function App() {
|
|
36 |
checked={activeTab === 'imageExamples'}
|
37 |
onChange={() => setActiveTab('imageExamples')}
|
38 |
/>
|
39 |
-
<div className="tab-content bg-base-100
|
40 |
-
<Examples fileType="image" />
|
41 |
</div>
|
42 |
|
43 |
<input
|
@@ -48,8 +63,8 @@ function App() {
|
|
48 |
checked={activeTab === 'audioExamples'}
|
49 |
onChange={() => setActiveTab('audioExamples')}
|
50 |
/>
|
51 |
-
<div className="tab-content bg-base-100
|
52 |
-
<Examples fileType="audio" />
|
53 |
</div>
|
54 |
|
55 |
<input
|
@@ -60,8 +75,8 @@ function App() {
|
|
60 |
checked={activeTab === 'videoExamples'}
|
61 |
onChange={() => setActiveTab('videoExamples')}
|
62 |
/>
|
63 |
-
<div className="tab-content bg-base-100
|
64 |
-
<Examples fileType="video" />
|
65 |
</div>
|
66 |
</div>
|
67 |
</div>
|
|
|
1 |
+
import { useState, useEffect } from 'react'
|
2 |
import Examples from './components/Examples'
|
3 |
+
import LeaderboardPage from './components/LeaderboardPage'
|
4 |
|
5 |
function App() {
|
6 |
const [activeTab, setActiveTab] = useState<
|
7 |
'leaderboard' | 'imageExamples' | 'audioExamples' | 'videoExamples'
|
8 |
>('leaderboard')
|
9 |
+
const [theme, setTheme] = useState<'dark' | 'light'>('dark')
|
10 |
+
|
11 |
+
useEffect(() => {
|
12 |
+
document.documentElement.setAttribute('data-theme', theme)
|
13 |
+
}, [theme])
|
14 |
|
15 |
return (
|
16 |
<div className="min-h-screen w-11/12 mx-auto">
|
17 |
+
<div className="bg-base-100 my-4">
|
18 |
+
<div className="flex flex-row justify-between items-center">
|
19 |
<h2 className="card-title">🥇 Omni Seal Bench Watermarking Leaderboard</h2>
|
20 |
+
<div className="flex justify-end items-center gap-2">
|
21 |
+
<span className="text-sm">{theme === 'dark' ? '🌙 Dark Mode' : '☀️ Light Mode'}</span>
|
22 |
+
<input
|
23 |
+
type="checkbox"
|
24 |
+
className="toggle"
|
25 |
+
checked={theme === 'dark'}
|
26 |
+
onChange={() => setTheme(theme === 'dark' ? 'light' : 'dark')}
|
27 |
+
aria-label="Toggle dark mode"
|
28 |
+
/>
|
29 |
+
</div>
|
30 |
</div>
|
31 |
</div>
|
32 |
|
|
|
39 |
checked={activeTab === 'leaderboard'}
|
40 |
onChange={() => setActiveTab('leaderboard')}
|
41 |
/>
|
42 |
+
<div className="tab-content bg-base-100 ">
|
43 |
+
<LeaderboardPage />
|
44 |
</div>
|
45 |
|
46 |
<input
|
|
|
51 |
checked={activeTab === 'imageExamples'}
|
52 |
onChange={() => setActiveTab('imageExamples')}
|
53 |
/>
|
54 |
+
<div className="tab-content bg-base-100 ">
|
55 |
+
{activeTab === 'imageExamples' ? <Examples fileType="image" /> : null}
|
56 |
</div>
|
57 |
|
58 |
<input
|
|
|
63 |
checked={activeTab === 'audioExamples'}
|
64 |
onChange={() => setActiveTab('audioExamples')}
|
65 |
/>
|
66 |
+
<div className="tab-content bg-base-100 ">
|
67 |
+
{activeTab === 'audioExamples' ? <Examples fileType="audio" /> : null}
|
68 |
</div>
|
69 |
|
70 |
<input
|
|
|
75 |
checked={activeTab === 'videoExamples'}
|
76 |
onChange={() => setActiveTab('videoExamples')}
|
77 |
/>
|
78 |
+
<div className="tab-content bg-base-100 ">
|
79 |
+
{activeTab === 'videoExamples' ? <Examples fileType="video" /> : null}
|
80 |
</div>
|
81 |
</div>
|
82 |
</div>
|
frontend/src/components/DatasetSelector.tsx
CHANGED
@@ -1,31 +1,31 @@
|
|
1 |
import React from 'react'
|
2 |
|
3 |
interface DatasetSelectorProps {
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
}
|
8 |
|
9 |
const DatasetSelector: React.FC<DatasetSelectorProps> = ({
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
}) => {
|
14 |
return (
|
15 |
<div className="mb-4">
|
16 |
-
<fieldset className="fieldset w-full p-4 rounded border">
|
17 |
<legend className="fieldset-legend font-semibold">Dataset</legend>
|
18 |
<div className="flex flex-wrap gap-2">
|
19 |
-
{
|
20 |
-
<label key={
|
21 |
<input
|
22 |
type="radio"
|
23 |
name="dataset"
|
24 |
className="radio radio-sm"
|
25 |
-
checked={
|
26 |
-
onChange={() =>
|
27 |
/>
|
28 |
-
<span className="text-sm">{
|
29 |
</label>
|
30 |
))}
|
31 |
</div>
|
|
|
1 |
import React from 'react'
|
2 |
|
3 |
interface DatasetSelectorProps {
|
4 |
+
datasetNames: string[]
|
5 |
+
selectedDatasetName: string
|
6 |
+
onDatasetNameChange: (datasetName: string) => void
|
7 |
}
|
8 |
|
9 |
const DatasetSelector: React.FC<DatasetSelectorProps> = ({
|
10 |
+
datasetNames,
|
11 |
+
selectedDatasetName,
|
12 |
+
onDatasetNameChange,
|
13 |
}) => {
|
14 |
return (
|
15 |
<div className="mb-4">
|
16 |
+
<fieldset className="fieldset w-full p-4 rounded border border-gray-700">
|
17 |
<legend className="fieldset-legend font-semibold">Dataset</legend>
|
18 |
<div className="flex flex-wrap gap-2">
|
19 |
+
{datasetNames.map((datasetName) => (
|
20 |
+
<label key={datasetName} className="flex items-center gap-2 cursor-pointer">
|
21 |
<input
|
22 |
type="radio"
|
23 |
name="dataset"
|
24 |
className="radio radio-sm"
|
25 |
+
checked={selectedDatasetName === datasetName}
|
26 |
+
onChange={() => onDatasetNameChange(datasetName)}
|
27 |
/>
|
28 |
+
<span className="text-sm">{datasetName}</span>
|
29 |
</label>
|
30 |
))}
|
31 |
</div>
|
frontend/src/components/Examples.tsx
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import React, { useState, useEffect } from 'react'
|
2 |
import API from '../API'
|
3 |
import AudioPlayer from './AudioPlayer'
|
|
|
4 |
|
5 |
interface ExamplesProps {
|
6 |
fileType: 'image' | 'audio' | 'video'
|
@@ -131,7 +132,7 @@ const Examples = ({ fileType }: ExamplesProps) => {
|
|
131 |
)}
|
132 |
</div>
|
133 |
|
134 |
-
{loading && <
|
135 |
{error && <p className="error">Error: {error}</p>}
|
136 |
|
137 |
{selectedModel && selectedAttack && (
|
|
|
1 |
import React, { useState, useEffect } from 'react'
|
2 |
import API from '../API'
|
3 |
import AudioPlayer from './AudioPlayer'
|
4 |
+
import LoadingSpinner from './LoadingSpinner'
|
5 |
|
6 |
interface ExamplesProps {
|
7 |
fileType: 'image' | 'audio' | 'video'
|
|
|
132 |
)}
|
133 |
</div>
|
134 |
|
135 |
+
{loading && <LoadingSpinner />}
|
136 |
{error && <p className="error">Error: {error}</p>}
|
137 |
|
138 |
{selectedModel && selectedAttack && (
|
frontend/src/components/LeaderBoardPage.tsx
DELETED
@@ -1,34 +0,0 @@
|
|
1 |
-
import React, { useState } from 'react'
|
2 |
-
import DatasetSelector from './DatasetSelector'
|
3 |
-
import LeaderboardTable from './LeaderboardTable'
|
4 |
-
import DataChart from './DataChart'
|
5 |
-
|
6 |
-
const LeaderBoardPage: React.FC = () => {
|
7 |
-
const datasets = [
|
8 |
-
'voxpopuli_1k_audio',
|
9 |
-
'ravdess_1k_audio',
|
10 |
-
'val2014_1k_image',
|
11 |
-
'sav_val_full_video',
|
12 |
-
]
|
13 |
-
const [selectedDataset, setSelectedDataset] = useState('voxpopuli_1k_audio')
|
14 |
-
|
15 |
-
return (
|
16 |
-
<div className="space-y-6">
|
17 |
-
<DatasetSelector
|
18 |
-
datasets={datasets}
|
19 |
-
selectedDataset={selectedDataset}
|
20 |
-
onDatasetChange={setSelectedDataset}
|
21 |
-
/>
|
22 |
-
|
23 |
-
<div className="space-y-8">
|
24 |
-
<LeaderboardTable dataset={selectedDataset} />
|
25 |
-
<div className="mt-8 pt-4 border-t border-gray-200">
|
26 |
-
<h3 className="text-lg font-semibold mb-4">Performance Chart</h3>
|
27 |
-
<DataChart dataset={selectedDataset} />
|
28 |
-
</div>
|
29 |
-
</div>
|
30 |
-
</div>
|
31 |
-
)
|
32 |
-
}
|
33 |
-
|
34 |
-
export default LeaderBoardPage
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
frontend/src/components/LeaderboardChart.tsx
ADDED
@@ -0,0 +1,241 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { useEffect, useState } from 'react'
|
2 |
+
import {
|
3 |
+
LineChart,
|
4 |
+
Line,
|
5 |
+
XAxis,
|
6 |
+
YAxis,
|
7 |
+
CartesianGrid,
|
8 |
+
Tooltip,
|
9 |
+
Legend,
|
10 |
+
ResponsiveContainer,
|
11 |
+
} from 'recharts'
|
12 |
+
import API from '../API'
|
13 |
+
import LoadingSpinner from './LoadingSpinner'
|
14 |
+
|
15 |
+
interface LeaderboardChartProps {
|
16 |
+
dataset: string
|
17 |
+
selectedModels: Set<string>
|
18 |
+
}
|
19 |
+
|
20 |
+
interface Row {
|
21 |
+
metric: string
|
22 |
+
[key: string]: string | number
|
23 |
+
}
|
24 |
+
|
25 |
+
const MetricSelector = ({
|
26 |
+
metrics,
|
27 |
+
selectedMetric,
|
28 |
+
onMetricChange,
|
29 |
+
}: {
|
30 |
+
metrics: Set<string>
|
31 |
+
selectedMetric: string | null
|
32 |
+
onMetricChange: (event: React.ChangeEvent<HTMLSelectElement>) => void
|
33 |
+
}) => {
|
34 |
+
return (
|
35 |
+
<fieldset className="fieldset">
|
36 |
+
<legend className="fieldset-legend">Metric</legend>
|
37 |
+
<select
|
38 |
+
id="metric-selector"
|
39 |
+
value={selectedMetric || ''}
|
40 |
+
onChange={onMetricChange}
|
41 |
+
className="select select-bordered w-full"
|
42 |
+
>
|
43 |
+
{[...metrics].map((metric) => (
|
44 |
+
<option key={metric} value={metric}>
|
45 |
+
{metric}
|
46 |
+
</option>
|
47 |
+
))}
|
48 |
+
</select>
|
49 |
+
</fieldset>
|
50 |
+
)
|
51 |
+
}
|
52 |
+
|
53 |
+
const AttackSelector = ({
|
54 |
+
attacks,
|
55 |
+
selectedAttack,
|
56 |
+
onAttackChange,
|
57 |
+
}: {
|
58 |
+
attacks: Set<string>
|
59 |
+
selectedAttack: string | null
|
60 |
+
onAttackChange: (event: React.ChangeEvent<HTMLSelectElement>) => void
|
61 |
+
}) => {
|
62 |
+
return (
|
63 |
+
<fieldset className="fieldset mb-4">
|
64 |
+
<legend className="fieldset-legend">Attack</legend>
|
65 |
+
<select
|
66 |
+
id="attack-selector"
|
67 |
+
value={selectedAttack || ''}
|
68 |
+
onChange={onAttackChange}
|
69 |
+
className="select select-bordered w-full"
|
70 |
+
>
|
71 |
+
{[...attacks].map((attack) => (
|
72 |
+
<option key={attack} value={attack}>
|
73 |
+
{attack}
|
74 |
+
</option>
|
75 |
+
))}
|
76 |
+
</select>
|
77 |
+
</fieldset>
|
78 |
+
)
|
79 |
+
}
|
80 |
+
|
81 |
+
const LeaderboardChart = ({ dataset, selectedModels }: LeaderboardChartProps) => {
|
82 |
+
const [chartData, setChartData] = useState<Row[]>([])
|
83 |
+
const [loading, setLoading] = useState(true)
|
84 |
+
const [error, setError] = useState<string | null>(null)
|
85 |
+
const [metrics, setMetrics] = useState<Set<string>>(new Set())
|
86 |
+
const [attacks, setAttacks] = useState<Set<string>>(new Set())
|
87 |
+
const [selectedMetric, setSelectedMetric] = useState<string | null>(null)
|
88 |
+
const [selectedAttack, setSelectedAttack] = useState<string | null>(null)
|
89 |
+
|
90 |
+
useEffect(() => {
|
91 |
+
setLoading(true)
|
92 |
+
API.fetchStaticFile(`data/${dataset}?dataset_type=attacks_variations`)
|
93 |
+
.then((response) => {
|
94 |
+
const data = JSON.parse(response)
|
95 |
+
const rows: Row[] = data['all_attacks_df'].map((row: any) => {
|
96 |
+
const newRow: Row = { ...row }
|
97 |
+
// Convert strength value to number if it exists and is a string
|
98 |
+
if (typeof newRow.strength === 'string') {
|
99 |
+
newRow.strength = parseFloat(newRow.strength)
|
100 |
+
}
|
101 |
+
return newRow
|
102 |
+
})
|
103 |
+
|
104 |
+
setSelectedMetric(data['metrics'][0])
|
105 |
+
setMetrics(new Set(data['metrics']))
|
106 |
+
setSelectedAttack(data['attacks_with_variations'][0])
|
107 |
+
setAttacks(new Set(data['attacks_with_variations']))
|
108 |
+
setChartData(rows)
|
109 |
+
setLoading(false)
|
110 |
+
})
|
111 |
+
.catch((err) => {
|
112 |
+
setError('Failed to fetch JSON: ' + err.message)
|
113 |
+
setLoading(false)
|
114 |
+
})
|
115 |
+
}, [dataset])
|
116 |
+
|
117 |
+
const handleMetricChange = (event: React.ChangeEvent<HTMLSelectElement>) => {
|
118 |
+
setSelectedMetric(event.target.value)
|
119 |
+
}
|
120 |
+
|
121 |
+
const handleAttackChange = (event: React.ChangeEvent<HTMLSelectElement>) => {
|
122 |
+
setSelectedAttack(event.target.value)
|
123 |
+
}
|
124 |
+
|
125 |
+
// Sort the chart data by the 'strength' field before rendering
|
126 |
+
const sortedChartData = chartData
|
127 |
+
.filter((row) => !selectedAttack || row.attack === selectedAttack)
|
128 |
+
.sort((a, b) => (a.strength as number) - (b.strength as number))
|
129 |
+
|
130 |
+
return (
|
131 |
+
<div className="rounded shadow p-4 overflow-auto mb-8">
|
132 |
+
{loading && <LoadingSpinner />}
|
133 |
+
{error && <div className="text-red-500">{error}</div>}
|
134 |
+
{!loading && !error && (
|
135 |
+
<>
|
136 |
+
<div className="flex flex-col md:flex-row md:gap-x-4 mb-4">
|
137 |
+
<div className="w-full md:w-1/2">
|
138 |
+
<MetricSelector
|
139 |
+
metrics={metrics}
|
140 |
+
selectedMetric={selectedMetric}
|
141 |
+
onMetricChange={handleMetricChange}
|
142 |
+
/>
|
143 |
+
</div>
|
144 |
+
<div className="w-full md:w-1/2">
|
145 |
+
<AttackSelector
|
146 |
+
attacks={attacks}
|
147 |
+
selectedAttack={selectedAttack}
|
148 |
+
onAttackChange={handleAttackChange}
|
149 |
+
/>
|
150 |
+
</div>
|
151 |
+
</div>
|
152 |
+
|
153 |
+
{chartData.length > 0 && (
|
154 |
+
<div className="h-64 mb-4">
|
155 |
+
<ResponsiveContainer width="100%" height="100%">
|
156 |
+
<LineChart
|
157 |
+
data={sortedChartData}
|
158 |
+
margin={{
|
159 |
+
top: 5,
|
160 |
+
right: 30,
|
161 |
+
left: 20,
|
162 |
+
bottom: 5,
|
163 |
+
}}
|
164 |
+
>
|
165 |
+
<CartesianGrid strokeDasharray="3 3" />
|
166 |
+
<XAxis
|
167 |
+
dataKey="strength"
|
168 |
+
domain={[
|
169 |
+
Math.min(...sortedChartData.map((item) => Number(item.strength))),
|
170 |
+
Math.max(...sortedChartData.map((item) => Number(item.strength))),
|
171 |
+
]}
|
172 |
+
type="number"
|
173 |
+
tickFormatter={(value) => value.toFixed(3)}
|
174 |
+
label={{ value: 'Strength', position: 'insideBottomRight', offset: -5 }}
|
175 |
+
/>
|
176 |
+
<YAxis
|
177 |
+
label={{
|
178 |
+
value: selectedMetric || '',
|
179 |
+
angle: -90,
|
180 |
+
position: 'insideLeft',
|
181 |
+
style: { textAnchor: 'middle' },
|
182 |
+
}}
|
183 |
+
tickFormatter={(value) => value.toFixed(3)}
|
184 |
+
/>
|
185 |
+
<Tooltip
|
186 |
+
contentStyle={{
|
187 |
+
backgroundColor: '#2a303c',
|
188 |
+
borderColor: '#374151',
|
189 |
+
color: 'white',
|
190 |
+
}}
|
191 |
+
formatter={(value: number) => value.toFixed(3)}
|
192 |
+
/>
|
193 |
+
<Legend />
|
194 |
+
|
195 |
+
{(() => {
|
196 |
+
// Ensure selectedMetric is not null before rendering the Line components
|
197 |
+
if (!selectedMetric) return null // Do not render lines if no metric is selected
|
198 |
+
|
199 |
+
// Get unique models from the filtered and sorted data
|
200 |
+
const models = new Set(
|
201 |
+
sortedChartData
|
202 |
+
.filter((row) => selectedModels.has(row.model as string))
|
203 |
+
.map((row) => row.model)
|
204 |
+
)
|
205 |
+
|
206 |
+
// Generate different colors for each model
|
207 |
+
const colors = [
|
208 |
+
'#8884d8',
|
209 |
+
'#82ca9d',
|
210 |
+
'#ffc658',
|
211 |
+
'#ff8042',
|
212 |
+
'#0088fe',
|
213 |
+
'#00C49F',
|
214 |
+
]
|
215 |
+
|
216 |
+
// Return a Line component for each model
|
217 |
+
return [...models].map((model, index) => {
|
218 |
+
return (
|
219 |
+
<Line
|
220 |
+
key={model as string}
|
221 |
+
type="monotone"
|
222 |
+
dataKey={selectedMetric as string} // Ensure selectedMetric is a string
|
223 |
+
data={sortedChartData.filter((row) => row.model === model)}
|
224 |
+
name={model as string}
|
225 |
+
stroke={colors[index % colors.length]}
|
226 |
+
dot={false}
|
227 |
+
/>
|
228 |
+
)
|
229 |
+
})
|
230 |
+
})()}
|
231 |
+
</LineChart>
|
232 |
+
</ResponsiveContainer>
|
233 |
+
</div>
|
234 |
+
)}
|
235 |
+
</>
|
236 |
+
)}
|
237 |
+
</div>
|
238 |
+
)
|
239 |
+
}
|
240 |
+
|
241 |
+
export default LeaderboardChart
|
frontend/src/components/LeaderboardFilter.tsx
CHANGED
@@ -173,7 +173,7 @@ const LeaderboardFilter: React.FC<FilterProps> = ({
|
|
173 |
<input
|
174 |
type="text"
|
175 |
placeholder="Search metrics..."
|
176 |
-
className="input input-bordered border-
|
177 |
value={searchTerm}
|
178 |
onChange={(e) => {
|
179 |
const value = e.target.value
|
|
|
173 |
<input
|
174 |
type="text"
|
175 |
placeholder="Search metrics..."
|
176 |
+
className="input input-bordered border-gray-300 input-sm w-48 pr-8"
|
177 |
value={searchTerm}
|
178 |
onChange={(e) => {
|
179 |
const value = e.target.value
|
frontend/src/components/LeaderboardPage.tsx
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import React, { useState, useEffect } from 'react'
|
2 |
+
import DatasetSelector from './DatasetSelector'
|
3 |
+
import LeaderboardTable from './LeaderboardTable'
|
4 |
+
import LeaderboardChart from './LeaderboardChart'
|
5 |
+
import ModelFilter from './ModelFilter'
|
6 |
+
import API from '../API'
|
7 |
+
import LoadingSpinner from './LoadingSpinner'
|
8 |
+
|
9 |
+
const LeaderboardPage: React.FC = () => {
|
10 |
+
const datasetNames = [
|
11 |
+
'voxpopuli_1k/audio',
|
12 |
+
'ravdess_1k/audio',
|
13 |
+
'val2014_1k/image',
|
14 |
+
'sav_val_full/video',
|
15 |
+
]
|
16 |
+
const [selectedDatasetName, setSelectedDatasetName] = useState(datasetNames[0])
|
17 |
+
const [models, setModels] = useState<string[]>([])
|
18 |
+
const [selectedModels, setSelectedModels] = useState<Set<string>>(new Set())
|
19 |
+
const [loading, setLoading] = useState(true)
|
20 |
+
const [benchmarkData, setBenchmarkData] = useState<any>(null)
|
21 |
+
|
22 |
+
// Fetch available models when dataset changes
|
23 |
+
useEffect(() => {
|
24 |
+
setLoading(true)
|
25 |
+
API.fetchStaticFile(`data/${selectedDatasetName}?dataset_type=benchmark`)
|
26 |
+
.then((response) => {
|
27 |
+
const data = JSON.parse(response)
|
28 |
+
setBenchmarkData(data)
|
29 |
+
const rows = data['rows']
|
30 |
+
const allKeys: string[] = Array.from(new Set(rows.flatMap((row: any) => Object.keys(row))))
|
31 |
+
// Remove 'metric' from headers if it exists
|
32 |
+
const headers = allKeys.filter((key) => key !== 'metric')
|
33 |
+
|
34 |
+
setModels(headers)
|
35 |
+
// Initialize all models as selected
|
36 |
+
setSelectedModels(new Set(headers))
|
37 |
+
setLoading(false)
|
38 |
+
})
|
39 |
+
.catch((err) => {
|
40 |
+
console.error('Failed to fetch models:', err)
|
41 |
+
setLoading(false)
|
42 |
+
})
|
43 |
+
}, [selectedDatasetName])
|
44 |
+
|
45 |
+
return (
|
46 |
+
<div className="">
|
47 |
+
<div className="flex flex-col gap-4">
|
48 |
+
<DatasetSelector
|
49 |
+
datasetNames={datasetNames}
|
50 |
+
selectedDatasetName={selectedDatasetName}
|
51 |
+
onDatasetNameChange={setSelectedDatasetName}
|
52 |
+
/>
|
53 |
+
</div>
|
54 |
+
{loading ? (
|
55 |
+
<LoadingSpinner />
|
56 |
+
) : (
|
57 |
+
<>
|
58 |
+
{models.length > 0 && (
|
59 |
+
<ModelFilter
|
60 |
+
models={models}
|
61 |
+
selectedModels={selectedModels}
|
62 |
+
setSelectedModels={setSelectedModels}
|
63 |
+
/>
|
64 |
+
)}
|
65 |
+
<div className="space-y-8">
|
66 |
+
<LeaderboardTable benchmarkData={benchmarkData} selectedModels={selectedModels} />
|
67 |
+
<div className="mt-8 pt-4 border-t border-gray-200">
|
68 |
+
<LeaderboardChart dataset={selectedDatasetName} selectedModels={selectedModels} />
|
69 |
+
</div>
|
70 |
+
</div>
|
71 |
+
</>
|
72 |
+
)}
|
73 |
+
</div>
|
74 |
+
)
|
75 |
+
}
|
76 |
+
|
77 |
+
export default LeaderboardPage
|
frontend/src/components/LeaderboardTable.tsx
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
import React, { useEffect, useState } from 'react'
|
2 |
-
import API from '../API'
|
3 |
import LeaderboardFilter from './LeaderboardFilter'
|
4 |
-
import
|
5 |
|
6 |
interface LeaderboardTableProps {
|
7 |
-
|
|
|
8 |
}
|
9 |
|
10 |
interface Row {
|
@@ -16,119 +16,126 @@ interface Groups {
|
|
16 |
[group: string]: { [subgroup: string]: string[] }
|
17 |
}
|
18 |
|
19 |
-
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
}
|
23 |
|
24 |
-
const LeaderboardTable: React.FC<LeaderboardTableProps> = ({
|
25 |
const [tableRows, setTableRows] = useState<Row[]>([])
|
26 |
const [tableHeader, setTableHeader] = useState<string[]>([])
|
27 |
-
const [loading, setLoading] = useState(true)
|
28 |
const [error, setError] = useState<string | null>(null)
|
29 |
const [groups, setGroups] = useState<Groups>({})
|
30 |
const [openGroups, setOpenGroups] = useState<{ [key: string]: boolean }>({})
|
31 |
const [openSubGroups, setOpenSubGroups] = useState<{ [key: string]: { [key: string]: boolean } }>(
|
32 |
{}
|
33 |
)
|
34 |
-
|
35 |
const [selectedMetrics, setSelectedMetrics] = useState<Set<string>>(new Set())
|
36 |
-
const [selectedModels, setSelectedModels] = useState<Set<string>>(new Set())
|
37 |
-
|
38 |
-
// To store the unique metrics from the Overall group
|
39 |
const [overallMetrics, setOverallMetrics] = useState<string[]>([])
|
|
|
40 |
|
41 |
useEffect(() => {
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
59 |
})
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
// Sort metrics to ensure consistent subgroup order
|
75 |
-
const sortedMetrics = [...metrics].sort()
|
76 |
-
|
77 |
-
// Create and sort subgroups
|
78 |
-
acc[group] = sortedMetrics.reduce<{ [key: string]: string[] }>((subAcc, metric) => {
|
79 |
-
const [mainGroup, subGroup] = metric.split('_')
|
80 |
-
if (!subAcc[mainGroup]) {
|
81 |
-
subAcc[mainGroup] = []
|
82 |
-
}
|
83 |
-
subAcc[mainGroup].push(metric)
|
84 |
-
return subAcc
|
85 |
-
}, {})
|
86 |
-
|
87 |
-
// Convert to sorted entries and back to object
|
88 |
-
acc[group] = Object.fromEntries(
|
89 |
-
Object.entries(acc[group]).sort(([subGroupA], [subGroupB]) =>
|
90 |
-
subGroupA.localeCompare(subGroupB)
|
91 |
-
)
|
92 |
)
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
Object.keys(groupsData).forEach((group) => {
|
108 |
-
initialOpenGroups[group] = false
|
109 |
-
initialOpenSubGroups[group] = {}
|
110 |
-
Object.keys(groupsData[group]).forEach((subGroup) => {
|
111 |
-
initialOpenSubGroups[group][subGroup] = false
|
112 |
-
})
|
113 |
})
|
114 |
-
|
115 |
-
// Get all metrics from all groups
|
116 |
-
const allMetrics = Object.values(groups).flat()
|
117 |
-
setSelectedMetrics(new Set(allMetrics))
|
118 |
-
// Initialize all models as selected
|
119 |
-
setSelectedModels(new Set(headers))
|
120 |
-
setTableHeader(headers)
|
121 |
-
setTableRows(rows)
|
122 |
-
setGroups(groupsData)
|
123 |
-
setOpenGroups(initialOpenGroups)
|
124 |
-
setOpenSubGroups(initialOpenSubGroups)
|
125 |
-
setLoading(false)
|
126 |
-
})
|
127 |
-
.catch((err) => {
|
128 |
-
setError('Failed to fetch JSON: ' + err.message)
|
129 |
-
setLoading(false)
|
130 |
})
|
131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
|
133 |
const toggleGroup = (group: string) => {
|
134 |
setOpenGroups((prev) => ({ ...prev, [group]: !prev[group] }))
|
@@ -227,23 +234,21 @@ const LeaderboardTable: React.FC<LeaderboardTableProps> = ({ dataset }) => {
|
|
227 |
}
|
228 |
|
229 |
return (
|
230 |
-
<div className="rounded shadow
|
231 |
-
{loading && <div>Loading...</div>}
|
232 |
{error && <div className="text-red-500">{error}</div>}
|
233 |
-
|
234 |
-
|
235 |
-
<div className="overflow-x-auto">
|
236 |
<div className="flex flex-col gap-4">
|
237 |
-
<
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
/>
|
242 |
-
<LeaderboardFilter
|
243 |
groups={groups}
|
244 |
selectedMetrics={selectedMetrics}
|
245 |
setSelectedMetrics={setSelectedMetrics}
|
246 |
-
/>
|
247 |
</div>
|
248 |
|
249 |
{selectedModels.size === 0 || selectedMetrics.size === 0 ? (
|
@@ -252,332 +257,25 @@ const LeaderboardTable: React.FC<LeaderboardTableProps> = ({ dataset }) => {
|
|
252 |
</div>
|
253 |
) : (
|
254 |
<>
|
255 |
-
|
256 |
-
<thead>
|
257 |
-
<tr>
|
258 |
-
<th>Group / Subgroup</th>
|
259 |
-
{overallMetrics.map((metric) => (
|
260 |
-
<th
|
261 |
-
key={metric}
|
262 |
-
colSpan={tableHeader.filter((model) => selectedModels.has(model)).length}
|
263 |
-
className="text-center border-x"
|
264 |
-
>
|
265 |
-
{metric}
|
266 |
-
</th>
|
267 |
-
))}
|
268 |
-
</tr>
|
269 |
-
<tr>
|
270 |
-
<th></th>
|
271 |
-
{overallMetrics.map((metric) => (
|
272 |
-
<React.Fragment key={`header-models-${metric}`}>
|
273 |
-
{tableHeader
|
274 |
-
.filter((model) => selectedModels.has(model))
|
275 |
-
.map((model) => (
|
276 |
-
<th key={`${metric}-${model}`} className="text-center text-xs">
|
277 |
-
{model}
|
278 |
-
</th>
|
279 |
-
))}
|
280 |
-
</React.Fragment>
|
281 |
-
))}
|
282 |
-
</tr>
|
283 |
-
</thead>
|
284 |
-
<tbody>
|
285 |
-
{/* First render each group */}
|
286 |
-
{Object.entries(groups).map(([group, subGroups]) => {
|
287 |
-
// Skip the "Overall" group completely
|
288 |
-
if (group === 'Overall') return null
|
289 |
-
|
290 |
-
// Get all metrics for this group
|
291 |
-
const allGroupMetrics = Object.values(subGroups).flat()
|
292 |
-
// Filter to only include selected metrics
|
293 |
-
const visibleGroupMetrics = filterMetricsByGroupAndSubgroup(
|
294 |
-
allGroupMetrics,
|
295 |
-
group
|
296 |
-
)
|
297 |
-
|
298 |
-
// Skip this group if no metrics are selected
|
299 |
-
if (visibleGroupMetrics.length === 0) return null
|
300 |
-
|
301 |
-
return (
|
302 |
-
<React.Fragment key={group}>
|
303 |
-
{/* Group row with average stats for the entire group */}
|
304 |
-
<tr
|
305 |
-
className="bg-base-200 cursor-pointer hover:bg-base-300"
|
306 |
-
onClick={() => toggleGroup(group)}
|
307 |
-
>
|
308 |
-
<td className="font-medium">
|
309 |
-
{openGroups[group] ? '▼ ' : '▶ '}
|
310 |
-
{group}
|
311 |
-
</td>
|
312 |
-
{/* For each metric column */}
|
313 |
-
{overallMetrics.map((metric) => (
|
314 |
-
// Render sub-columns for each model
|
315 |
-
<React.Fragment key={`${group}-${metric}`}>
|
316 |
-
{tableHeader
|
317 |
-
.filter((model) => selectedModels.has(model))
|
318 |
-
.map((col) => {
|
319 |
-
// Find all metrics in this group that match the current metric name
|
320 |
-
const allMetricsWithName = findAllMetricsForName(metric)
|
321 |
-
const metricsInGroupForThisMetric = visibleGroupMetrics.filter(
|
322 |
-
(m) => allMetricsWithName.includes(m)
|
323 |
-
)
|
324 |
-
const stats = calculateStats(metricsInGroupForThisMetric, col)
|
325 |
-
|
326 |
-
return (
|
327 |
-
<td
|
328 |
-
key={`${group}-${metric}-${col}`}
|
329 |
-
className="font-medium text-center"
|
330 |
-
>
|
331 |
-
{!isNaN(stats.avg)
|
332 |
-
? `${stats.avg.toFixed(3)} ± ${stats.stdDev.toFixed(3)}`
|
333 |
-
: 'N/A'}
|
334 |
-
</td>
|
335 |
-
)
|
336 |
-
})}
|
337 |
-
</React.Fragment>
|
338 |
-
))}
|
339 |
-
</tr>
|
340 |
-
|
341 |
-
{/* Only render subgroups if group is open */}
|
342 |
-
{openGroups[group] &&
|
343 |
-
Object.entries(subGroups).map(([subGroup, metrics]) => {
|
344 |
-
// Filter to only include selected metrics in this subgroup
|
345 |
-
const visibleSubgroupMetrics = filterMetricsByGroupAndSubgroup(
|
346 |
-
metrics,
|
347 |
-
group,
|
348 |
-
subGroup
|
349 |
-
)
|
350 |
-
|
351 |
-
// Skip this subgroup if no metrics are selected
|
352 |
-
if (visibleSubgroupMetrics.length === 0) return null
|
353 |
-
|
354 |
-
return (
|
355 |
-
<React.Fragment key={`${group}-${subGroup}`}>
|
356 |
-
{/* Subgroup row with average stats for the subgroup */}
|
357 |
-
<tr
|
358 |
-
className="bg-base-100 cursor-pointer hover:bg-base-200"
|
359 |
-
onClick={() => toggleSubGroup(group, subGroup)}
|
360 |
-
>
|
361 |
-
<td className="pl-6 font-medium">
|
362 |
-
{openSubGroups[group]?.[subGroup] ? '▼ ' : '▶ '}
|
363 |
-
{subGroup}
|
364 |
-
</td>
|
365 |
-
{/* For each metric column */}
|
366 |
-
{overallMetrics.map((metric) => (
|
367 |
-
// Render sub-columns for each model
|
368 |
-
<React.Fragment key={`${group}-${subGroup}-${metric}`}>
|
369 |
-
{tableHeader
|
370 |
-
.filter((model) => selectedModels.has(model))
|
371 |
-
.map((col) => {
|
372 |
-
// Find all metrics in this subgroup that match the current metric name
|
373 |
-
const allMetricsWithName = findAllMetricsForName(metric)
|
374 |
-
const metricsInSubgroupForThisMetric =
|
375 |
-
visibleSubgroupMetrics.filter((m) =>
|
376 |
-
allMetricsWithName.includes(m)
|
377 |
-
)
|
378 |
-
const stats = calculateStats(
|
379 |
-
metricsInSubgroupForThisMetric,
|
380 |
-
col
|
381 |
-
)
|
382 |
-
|
383 |
-
return (
|
384 |
-
<td
|
385 |
-
key={`${group}-${subGroup}-${metric}-${col}`}
|
386 |
-
className="font-medium text-center"
|
387 |
-
>
|
388 |
-
{!isNaN(stats.avg)
|
389 |
-
? `${stats.avg.toFixed(3)} ± ${stats.stdDev.toFixed(3)}`
|
390 |
-
: 'N/A'}
|
391 |
-
</td>
|
392 |
-
)
|
393 |
-
})}
|
394 |
-
</React.Fragment>
|
395 |
-
))}
|
396 |
-
</tr>
|
397 |
-
|
398 |
-
{/* Individual metric rows */}
|
399 |
-
{openSubGroups[group]?.[subGroup] &&
|
400 |
-
// Sort visibleSubgroupMetrics alphabetically by the clean metric name
|
401 |
-
[...visibleSubgroupMetrics]
|
402 |
-
.sort((a, b) => {
|
403 |
-
// For metrics with format {category}_{strength}_{overall_metric_name},
|
404 |
-
// First sort by category, then by overall_metric_name, then by strength
|
405 |
-
|
406 |
-
// First extract the overall metric group
|
407 |
-
const getOverallMetricGroup = (metric: string) => {
|
408 |
-
for (const overall of overallMetrics) {
|
409 |
-
if (
|
410 |
-
metric.endsWith(`_${overall}`) ||
|
411 |
-
metric === overall
|
412 |
-
) {
|
413 |
-
return overall
|
414 |
-
}
|
415 |
-
}
|
416 |
-
return ''
|
417 |
-
}
|
418 |
-
|
419 |
-
const overallA = getOverallMetricGroup(a)
|
420 |
-
const overallB = getOverallMetricGroup(b)
|
421 |
-
|
422 |
-
// Extract the strength (last part before the overall metric)
|
423 |
-
const stripOverall = (metric: string, overall: string) => {
|
424 |
-
if (metric.endsWith(`_${overall}`)) {
|
425 |
-
// Remove the overall metric group and any preceding underscore
|
426 |
-
const stripped = metric.slice(
|
427 |
-
0,
|
428 |
-
metric.length - overall.length - 1
|
429 |
-
)
|
430 |
-
const parts = stripped.split('_')
|
431 |
-
return parts.length > 0 ? parts[parts.length - 1] : ''
|
432 |
-
}
|
433 |
-
return metric
|
434 |
-
}
|
435 |
-
|
436 |
-
// Extract the category (what remains after removing strength and overall_metric_name)
|
437 |
-
const getCategory = (metric: string, overall: string) => {
|
438 |
-
if (metric.endsWith(`_${overall}`)) {
|
439 |
-
const stripped = metric.slice(
|
440 |
-
0,
|
441 |
-
metric.length - overall.length - 1
|
442 |
-
)
|
443 |
-
const parts = stripped.split('_')
|
444 |
-
// Remove the last part (strength) and join the rest (category)
|
445 |
-
return parts.length > 1
|
446 |
-
? parts.slice(0, parts.length - 1).join('_')
|
447 |
-
: ''
|
448 |
-
}
|
449 |
-
return metric
|
450 |
-
}
|
451 |
-
|
452 |
-
const categoryA = getCategory(a, overallA)
|
453 |
-
const categoryB = getCategory(b, overallB)
|
454 |
-
|
455 |
-
// First sort by category
|
456 |
-
if (categoryA !== categoryB) {
|
457 |
-
return categoryA.localeCompare(categoryB)
|
458 |
-
}
|
459 |
-
|
460 |
-
// Then sort by overall metric name
|
461 |
-
if (overallA !== overallB) {
|
462 |
-
return overallA.localeCompare(overallB)
|
463 |
-
}
|
464 |
-
|
465 |
-
// Finally sort by strength
|
466 |
-
const subA = stripOverall(a, overallA)
|
467 |
-
const subB = stripOverall(b, overallB)
|
468 |
-
|
469 |
-
// Try to parse subA and subB as numbers, handling k/m/b suffixes
|
470 |
-
const parseNumber = (str: string) => {
|
471 |
-
const match = str.match(/^(\d+(?:\.\d+)?)([kKmMbB]?)$/)
|
472 |
-
if (!match) return NaN
|
473 |
-
let [_, num, suffix] = match
|
474 |
-
let value = parseFloat(num)
|
475 |
-
switch (suffix.toLowerCase()) {
|
476 |
-
case 'k':
|
477 |
-
value *= 1e3
|
478 |
-
break
|
479 |
-
case 'm':
|
480 |
-
value *= 1e6
|
481 |
-
break
|
482 |
-
case 'b':
|
483 |
-
value *= 1e9
|
484 |
-
break
|
485 |
-
}
|
486 |
-
return value
|
487 |
-
}
|
488 |
-
|
489 |
-
const numA = parseNumber(subA)
|
490 |
-
const numB = parseNumber(subB)
|
491 |
-
|
492 |
-
if (!isNaN(numA) && !isNaN(numB)) {
|
493 |
-
return numA - numB
|
494 |
-
}
|
495 |
-
// Fallback to string comparison if not both numbers
|
496 |
-
return subA.localeCompare(subB)
|
497 |
-
})
|
498 |
-
.map((metric) => {
|
499 |
-
const row = tableRows.find((r) => r.metric === metric)
|
500 |
-
if (!row) return null
|
501 |
-
|
502 |
-
// Extract the metric name (after the underscore)
|
503 |
-
const metricName = metric.includes('_')
|
504 |
-
? metric.split('_').slice(1).join('_')
|
505 |
-
: metric
|
506 |
-
|
507 |
-
return (
|
508 |
-
<tr key={metric} className="hover:bg-base-100">
|
509 |
-
<td className="pl-10">{metric}</td>
|
510 |
-
{/* For each metric column */}
|
511 |
-
{overallMetrics.map((oMetric) => {
|
512 |
-
// Only show values for the matching metric
|
513 |
-
const isMatchingMetric =
|
514 |
-
findAllMetricsForName(oMetric).includes(metric)
|
515 |
-
|
516 |
-
if (!isMatchingMetric) {
|
517 |
-
// Fill empty cells for non-matching metrics
|
518 |
-
return (
|
519 |
-
<React.Fragment key={`${metric}-${oMetric}`}>
|
520 |
-
{tableHeader
|
521 |
-
.filter((model) => selectedModels.has(model))
|
522 |
-
.map((col) => (
|
523 |
-
<td
|
524 |
-
key={`${metric}-${oMetric}-${col}`}
|
525 |
-
className="text-center"
|
526 |
-
></td>
|
527 |
-
))}
|
528 |
-
</React.Fragment>
|
529 |
-
)
|
530 |
-
}
|
531 |
-
|
532 |
-
// Show values for the matching metric
|
533 |
-
return (
|
534 |
-
<React.Fragment key={`${metric}-${oMetric}`}>
|
535 |
-
{tableHeader
|
536 |
-
.filter((model) => selectedModels.has(model))
|
537 |
-
.map((col) => {
|
538 |
-
const cell = row[col]
|
539 |
-
return (
|
540 |
-
<td
|
541 |
-
key={`${metric}-${oMetric}-${col}`}
|
542 |
-
className="text-center"
|
543 |
-
>
|
544 |
-
{!isNaN(Number(cell))
|
545 |
-
? Number(Number(cell).toFixed(3))
|
546 |
-
: cell}
|
547 |
-
</td>
|
548 |
-
)
|
549 |
-
})}
|
550 |
-
</React.Fragment>
|
551 |
-
)
|
552 |
-
})}
|
553 |
-
</tr>
|
554 |
-
)
|
555 |
-
})}
|
556 |
-
</React.Fragment>
|
557 |
-
)
|
558 |
-
})}
|
559 |
-
</React.Fragment>
|
560 |
-
)
|
561 |
-
})}
|
562 |
-
</tbody>
|
563 |
-
</table>
|
564 |
-
|
565 |
-
{/* Separate table for metrics that don't belong to any overall group */}
|
566 |
{(() => {
|
567 |
const standaloneMetrics = findStandaloneMetrics()
|
568 |
if (standaloneMetrics.length === 0) return null
|
569 |
-
|
570 |
return (
|
571 |
-
<div className="
|
572 |
-
<
|
573 |
-
<table className="table w-full">
|
574 |
<thead>
|
575 |
<tr>
|
576 |
-
<th>
|
|
|
|
|
577 |
{tableHeader
|
578 |
.filter((model) => selectedModels.has(model))
|
579 |
.map((model) => (
|
580 |
-
<th
|
|
|
|
|
|
|
581 |
{model}
|
582 |
</th>
|
583 |
))}
|
@@ -587,16 +285,20 @@ const LeaderboardTable: React.FC<LeaderboardTableProps> = ({ dataset }) => {
|
|
587 |
{standaloneMetrics.sort().map((metric) => {
|
588 |
const row = tableRows.find((r) => r.metric === metric)
|
589 |
if (!row) return null
|
590 |
-
|
591 |
return (
|
592 |
<tr key={`standalone-${metric}`} className="hover:bg-base-100">
|
593 |
-
<td>
|
|
|
|
|
594 |
{tableHeader
|
595 |
.filter((model) => selectedModels.has(model))
|
596 |
.map((col) => {
|
597 |
const cell = row[col]
|
598 |
return (
|
599 |
-
<td
|
|
|
|
|
|
|
600 |
{!isNaN(Number(cell))
|
601 |
? Number(Number(cell).toFixed(3))
|
602 |
: cell}
|
@@ -611,6 +313,342 @@ const LeaderboardTable: React.FC<LeaderboardTableProps> = ({ dataset }) => {
|
|
611 |
</div>
|
612 |
)
|
613 |
})()}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
614 |
</>
|
615 |
)}
|
616 |
</div>
|
|
|
1 |
import React, { useEffect, useState } from 'react'
|
|
|
2 |
import LeaderboardFilter from './LeaderboardFilter'
|
3 |
+
import LoadingSpinner from './LoadingSpinner'
|
4 |
|
5 |
interface LeaderboardTableProps {
|
6 |
+
benchmarkData: any
|
7 |
+
selectedModels: Set<string>
|
8 |
}
|
9 |
|
10 |
interface Row {
|
|
|
16 |
[group: string]: { [subgroup: string]: string[] }
|
17 |
}
|
18 |
|
19 |
+
const OverallMetricFilter: React.FC<{
|
20 |
+
overallMetrics: string[]
|
21 |
+
selectedOverallMetrics: Set<string>
|
22 |
+
setSelectedOverallMetrics: (metrics: Set<string>) => void
|
23 |
+
}> = ({ overallMetrics, selectedOverallMetrics, setSelectedOverallMetrics }) => {
|
24 |
+
const toggleMetric = (metric: string) => {
|
25 |
+
const newSelected = new Set(selectedOverallMetrics)
|
26 |
+
if (newSelected.has(metric)) {
|
27 |
+
newSelected.delete(metric)
|
28 |
+
} else {
|
29 |
+
newSelected.add(metric)
|
30 |
+
}
|
31 |
+
setSelectedOverallMetrics(newSelected)
|
32 |
+
}
|
33 |
+
return (
|
34 |
+
<div className="w-full mb-4">
|
35 |
+
<fieldset className="fieldset w-full p-4 rounded border border-gray-700">
|
36 |
+
<legend className="fieldset-legend font-semibold">
|
37 |
+
Metrics ({selectedOverallMetrics.size}/{overallMetrics.length})
|
38 |
+
</legend>
|
39 |
+
<div className="grid grid-cols-2 md:grid-cols-4 lg:grid-cols-6 gap-1 max-h-48 overflow-y-auto pr-2">
|
40 |
+
{overallMetrics.map((metric) => (
|
41 |
+
<label key={metric} className="flex items-center gap-2 text-sm">
|
42 |
+
<input
|
43 |
+
type="checkbox"
|
44 |
+
className="form-checkbox h-4 w-4"
|
45 |
+
checked={selectedOverallMetrics.has(metric)}
|
46 |
+
onChange={() => toggleMetric(metric)}
|
47 |
+
/>
|
48 |
+
<span className="truncate" title={metric}>
|
49 |
+
{metric}
|
50 |
+
</span>
|
51 |
+
</label>
|
52 |
+
))}
|
53 |
+
</div>
|
54 |
+
</fieldset>
|
55 |
+
</div>
|
56 |
+
)
|
57 |
}
|
58 |
|
59 |
+
const LeaderboardTable: React.FC<LeaderboardTableProps> = ({ benchmarkData, selectedModels }) => {
|
60 |
const [tableRows, setTableRows] = useState<Row[]>([])
|
61 |
const [tableHeader, setTableHeader] = useState<string[]>([])
|
|
|
62 |
const [error, setError] = useState<string | null>(null)
|
63 |
const [groups, setGroups] = useState<Groups>({})
|
64 |
const [openGroups, setOpenGroups] = useState<{ [key: string]: boolean }>({})
|
65 |
const [openSubGroups, setOpenSubGroups] = useState<{ [key: string]: { [key: string]: boolean } }>(
|
66 |
{}
|
67 |
)
|
|
|
68 |
const [selectedMetrics, setSelectedMetrics] = useState<Set<string>>(new Set())
|
|
|
|
|
|
|
69 |
const [overallMetrics, setOverallMetrics] = useState<string[]>([])
|
70 |
+
const [selectedOverallMetrics, setSelectedOverallMetrics] = useState<Set<string>>(new Set())
|
71 |
|
72 |
useEffect(() => {
|
73 |
+
if (!benchmarkData) {
|
74 |
+
return
|
75 |
+
}
|
76 |
+
try {
|
77 |
+
const data = benchmarkData
|
78 |
+
const rows: Row[] = data['rows']
|
79 |
+
const allGroups = data['groups'] as { [key: string]: string[] }
|
80 |
+
const { Overall: overallGroup, ...groups } = allGroups
|
81 |
+
const uniqueMetrics = new Set<string>()
|
82 |
+
overallGroup?.forEach((metric) => {
|
83 |
+
if (metric.includes('_')) {
|
84 |
+
const metricName = metric.split('_').slice(1).join('_')
|
85 |
+
uniqueMetrics.add(metricName)
|
86 |
+
}
|
87 |
+
})
|
88 |
+
setOverallMetrics(Array.from(uniqueMetrics).sort())
|
89 |
+
setSelectedOverallMetrics(new Set(Array.from(uniqueMetrics)))
|
90 |
+
const groupsData = Object.entries(groups)
|
91 |
+
.sort(([groupA], [groupB]) => {
|
92 |
+
if (groupA === 'Overall') return -1
|
93 |
+
if (groupB === 'Overall') return 1
|
94 |
+
return groupA.localeCompare(groupB)
|
95 |
})
|
96 |
+
.reduce(
|
97 |
+
(acc, [group, metrics]) => {
|
98 |
+
const sortedMetrics = [...metrics].sort()
|
99 |
+
acc[group] = sortedMetrics.reduce<{ [key: string]: string[] }>((subAcc, metric) => {
|
100 |
+
const [mainGroup, subGroup] = metric.split('_')
|
101 |
+
if (!subAcc[mainGroup]) {
|
102 |
+
subAcc[mainGroup] = []
|
103 |
+
}
|
104 |
+
subAcc[mainGroup].push(metric)
|
105 |
+
return subAcc
|
106 |
+
}, {})
|
107 |
+
acc[group] = Object.fromEntries(
|
108 |
+
Object.entries(acc[group]).sort(([subGroupA], [subGroupB]) =>
|
109 |
+
subGroupA.localeCompare(subGroupB)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
)
|
111 |
+
)
|
112 |
+
return acc
|
113 |
+
},
|
114 |
+
{} as { [key: string]: { [key: string]: string[] } }
|
115 |
+
)
|
116 |
+
const allKeys: string[] = Array.from(new Set(rows.flatMap((row) => Object.keys(row))))
|
117 |
+
const headers = allKeys.filter((key) => key !== 'metric')
|
118 |
+
const initialOpenGroups: { [key: string]: boolean } = {}
|
119 |
+
const initialOpenSubGroups: { [key: string]: { [key: string]: boolean } } = {}
|
120 |
+
Object.keys(groupsData).forEach((group) => {
|
121 |
+
initialOpenGroups[group] = false
|
122 |
+
initialOpenSubGroups[group] = {}
|
123 |
+
Object.keys(groupsData[group]).forEach((subGroup) => {
|
124 |
+
initialOpenSubGroups[group][subGroup] = false
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
})
|
127 |
+
const allMetrics = Object.values(groups).flat()
|
128 |
+
setSelectedMetrics(new Set(allMetrics))
|
129 |
+
setTableHeader(headers)
|
130 |
+
setTableRows(rows)
|
131 |
+
setGroups(groupsData)
|
132 |
+
setOpenGroups(initialOpenGroups)
|
133 |
+
setOpenSubGroups(initialOpenSubGroups)
|
134 |
+
setError(null)
|
135 |
+
} catch (err: any) {
|
136 |
+
setError('Failed to parse benchmark data, please try again: ' + err.message)
|
137 |
+
}
|
138 |
+
}, [benchmarkData])
|
139 |
|
140 |
const toggleGroup = (group: string) => {
|
141 |
setOpenGroups((prev) => ({ ...prev, [group]: !prev[group] }))
|
|
|
234 |
}
|
235 |
|
236 |
return (
|
237 |
+
<div className="rounded shadow">
|
|
|
238 |
{error && <div className="text-red-500">{error}</div>}
|
239 |
+
{!error && (
|
240 |
+
<div className="flex flex-col gap-8">
|
|
|
241 |
<div className="flex flex-col gap-4">
|
242 |
+
<OverallMetricFilter
|
243 |
+
overallMetrics={overallMetrics}
|
244 |
+
selectedOverallMetrics={selectedOverallMetrics}
|
245 |
+
setSelectedOverallMetrics={setSelectedOverallMetrics}
|
246 |
/>
|
247 |
+
{/* <LeaderboardFilter
|
248 |
groups={groups}
|
249 |
selectedMetrics={selectedMetrics}
|
250 |
setSelectedMetrics={setSelectedMetrics}
|
251 |
+
/> */}
|
252 |
</div>
|
253 |
|
254 |
{selectedModels.size === 0 || selectedMetrics.size === 0 ? (
|
|
|
257 |
</div>
|
258 |
) : (
|
259 |
<>
|
260 |
+
{/* Standalone metrics table */}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
261 |
{(() => {
|
262 |
const standaloneMetrics = findStandaloneMetrics()
|
263 |
if (standaloneMetrics.length === 0) return null
|
|
|
264 |
return (
|
265 |
+
<div className="overflow-x-auto max-h-[80vh] overflow-y-auto">
|
266 |
+
<table className="table w-full min-w-max border-gray-700 border">
|
|
|
267 |
<thead>
|
268 |
<tr>
|
269 |
+
<th className="sticky left-0 top-0 bg-base-100 z-20 border-gray-700 border">
|
270 |
+
Metric
|
271 |
+
</th>
|
272 |
{tableHeader
|
273 |
.filter((model) => selectedModels.has(model))
|
274 |
.map((model) => (
|
275 |
+
<th
|
276 |
+
key={`standalone-${model}`}
|
277 |
+
className="sticky top-0 bg-base-100 z-10 text-center text-xs border-gray-700 border"
|
278 |
+
>
|
279 |
{model}
|
280 |
</th>
|
281 |
))}
|
|
|
285 |
{standaloneMetrics.sort().map((metric) => {
|
286 |
const row = tableRows.find((r) => r.metric === metric)
|
287 |
if (!row) return null
|
|
|
288 |
return (
|
289 |
<tr key={`standalone-${metric}`} className="hover:bg-base-100">
|
290 |
+
<td className="sticky left-0 bg-base-100 z-10 border-gray-700 border">
|
291 |
+
{metric}
|
292 |
+
</td>
|
293 |
{tableHeader
|
294 |
.filter((model) => selectedModels.has(model))
|
295 |
.map((col) => {
|
296 |
const cell = row[col]
|
297 |
return (
|
298 |
+
<td
|
299 |
+
key={`standalone-${metric}-${col}`}
|
300 |
+
className="text-center border-gray-700 border"
|
301 |
+
>
|
302 |
{!isNaN(Number(cell))
|
303 |
? Number(Number(cell).toFixed(3))
|
304 |
: cell}
|
|
|
313 |
</div>
|
314 |
)
|
315 |
})()}
|
316 |
+
|
317 |
+
{/* Main metrics table */}
|
318 |
+
<div className="overflow-x-auto max-h-[80vh] overflow-y-auto">
|
319 |
+
<table className="table w-full min-w-max border-gray-700 border">
|
320 |
+
<thead>
|
321 |
+
<tr>
|
322 |
+
<th className="sticky left-0 top-0 bg-base-100 z-20 border-gray-700 border">
|
323 |
+
Attack Category Metrics
|
324 |
+
</th>
|
325 |
+
{overallMetrics
|
326 |
+
.filter((metric) => selectedOverallMetrics.has(metric))
|
327 |
+
.map((metric) => (
|
328 |
+
<th
|
329 |
+
key={metric}
|
330 |
+
colSpan={
|
331 |
+
tableHeader.filter((model) => selectedModels.has(model)).length
|
332 |
+
}
|
333 |
+
className="sticky top-0 bg-base-100 z-10 text-center border-x border-gray-300 border border-gray-700 border"
|
334 |
+
>
|
335 |
+
{metric}
|
336 |
+
</th>
|
337 |
+
))}
|
338 |
+
</tr>
|
339 |
+
<tr>
|
340 |
+
<th className="sticky left-0 bg-base-100 z-10 border-gray-700 border"></th>
|
341 |
+
{overallMetrics
|
342 |
+
.filter((metric) => selectedOverallMetrics.has(metric))
|
343 |
+
.map((metric) => (
|
344 |
+
<React.Fragment key={`header-models-${metric}`}>
|
345 |
+
{tableHeader
|
346 |
+
.filter((model) => selectedModels.has(model))
|
347 |
+
.map((model) => (
|
348 |
+
<th
|
349 |
+
key={`${metric}-${model}`}
|
350 |
+
className="sticky top-12 bg-base-100 z-10 text-center text-xs border-gray-700 border border-bottom-solid border-b-gray-700 border-b-2"
|
351 |
+
>
|
352 |
+
{model}
|
353 |
+
</th>
|
354 |
+
))}
|
355 |
+
</React.Fragment>
|
356 |
+
))}
|
357 |
+
</tr>
|
358 |
+
</thead>
|
359 |
+
<tbody>
|
360 |
+
{/* First render each group */}
|
361 |
+
{Object.entries(groups).map(([group, subGroups]) => {
|
362 |
+
// Skip the "Overall" group completely
|
363 |
+
if (group === 'Overall') return null
|
364 |
+
|
365 |
+
// Get all metrics for this group
|
366 |
+
const allGroupMetrics = Object.values(subGroups).flat()
|
367 |
+
// Filter to only include selected metrics
|
368 |
+
const visibleGroupMetrics = filterMetricsByGroupAndSubgroup(
|
369 |
+
allGroupMetrics,
|
370 |
+
group
|
371 |
+
)
|
372 |
+
|
373 |
+
// Skip this group if no metrics are selected
|
374 |
+
if (visibleGroupMetrics.length === 0) return null
|
375 |
+
|
376 |
+
return (
|
377 |
+
<React.Fragment key={group}>
|
378 |
+
{/* Group row with average stats for the entire group */}
|
379 |
+
<tr
|
380 |
+
className="bg-base-200 cursor-pointer hover:bg-base-300"
|
381 |
+
onClick={() => toggleGroup(group)}
|
382 |
+
>
|
383 |
+
<td className="sticky left-0 bg-base-200 z-10 font-medium border-gray-700 border">
|
384 |
+
{openGroups[group] ? '▼ ' : '▶ '}
|
385 |
+
{group}
|
386 |
+
</td>
|
387 |
+
{/* For each metric column */}
|
388 |
+
{overallMetrics
|
389 |
+
.filter((metric) => selectedOverallMetrics.has(metric))
|
390 |
+
.map((metric) => (
|
391 |
+
// Render sub-columns for each model
|
392 |
+
<React.Fragment key={`${group}-${metric}`}>
|
393 |
+
{tableHeader
|
394 |
+
.filter((model) => selectedModels.has(model))
|
395 |
+
.map((col) => {
|
396 |
+
// Find all metrics in this group that match the current metric name
|
397 |
+
const allMetricsWithName = findAllMetricsForName(metric)
|
398 |
+
const metricsInGroupForThisMetric =
|
399 |
+
visibleGroupMetrics.filter((m) =>
|
400 |
+
allMetricsWithName.includes(m)
|
401 |
+
)
|
402 |
+
const stats = calculateStats(metricsInGroupForThisMetric, col)
|
403 |
+
|
404 |
+
return (
|
405 |
+
<td
|
406 |
+
key={`${group}-${metric}-${col}`}
|
407 |
+
className="font-medium text-center border-gray-700 border"
|
408 |
+
>
|
409 |
+
{!isNaN(stats.avg)
|
410 |
+
? `${stats.avg.toFixed(3)} ± ${stats.stdDev.toFixed(3)}`
|
411 |
+
: 'N/A'}
|
412 |
+
</td>
|
413 |
+
)
|
414 |
+
})}
|
415 |
+
</React.Fragment>
|
416 |
+
))}
|
417 |
+
</tr>
|
418 |
+
|
419 |
+
{/* Only render subgroups if group is open */}
|
420 |
+
{openGroups[group] &&
|
421 |
+
Object.entries(subGroups).map(([subGroup, metrics]) => {
|
422 |
+
// Filter to only include selected metrics in this subgroup
|
423 |
+
const visibleSubgroupMetrics = filterMetricsByGroupAndSubgroup(
|
424 |
+
metrics,
|
425 |
+
group,
|
426 |
+
subGroup
|
427 |
+
)
|
428 |
+
|
429 |
+
// Skip this subgroup if no metrics are selected
|
430 |
+
if (visibleSubgroupMetrics.length === 0) return null
|
431 |
+
|
432 |
+
return (
|
433 |
+
<React.Fragment key={`${group}-${subGroup}`}>
|
434 |
+
{/* Subgroup row with average stats for the subgroup */}
|
435 |
+
<tr
|
436 |
+
className="bg-base-100 cursor-pointer hover:bg-base-200"
|
437 |
+
onClick={() => toggleSubGroup(group, subGroup)}
|
438 |
+
>
|
439 |
+
<td className="sticky left-0 bg-base-100 z-10 pl-6 font-medium border-gray-700 border">
|
440 |
+
{openSubGroups[group]?.[subGroup] ? '▼ ' : '▶ '}
|
441 |
+
{subGroup}
|
442 |
+
</td>
|
443 |
+
{/* For each metric column */}
|
444 |
+
{overallMetrics
|
445 |
+
.filter((metric) => selectedOverallMetrics.has(metric))
|
446 |
+
.map((metric) => (
|
447 |
+
// Render sub-columns for each model
|
448 |
+
<React.Fragment key={`${group}-${subGroup}-${metric}`}>
|
449 |
+
{tableHeader
|
450 |
+
.filter((model) => selectedModels.has(model))
|
451 |
+
.map((col) => {
|
452 |
+
// Find all metrics in this subgroup that match the current metric name
|
453 |
+
const allMetricsWithName =
|
454 |
+
findAllMetricsForName(metric)
|
455 |
+
const metricsInSubgroupForThisMetric =
|
456 |
+
visibleSubgroupMetrics.filter((m) =>
|
457 |
+
allMetricsWithName.includes(m)
|
458 |
+
)
|
459 |
+
const stats = calculateStats(
|
460 |
+
metricsInSubgroupForThisMetric,
|
461 |
+
col
|
462 |
+
)
|
463 |
+
|
464 |
+
return (
|
465 |
+
<td
|
466 |
+
key={`${group}-${subGroup}-${metric}-${col}`}
|
467 |
+
className="font-medium text-center border-gray-700 border"
|
468 |
+
>
|
469 |
+
{!isNaN(stats.avg)
|
470 |
+
? `${stats.avg.toFixed(3)} ± ${stats.stdDev.toFixed(3)}`
|
471 |
+
: 'N/A'}
|
472 |
+
</td>
|
473 |
+
)
|
474 |
+
})}
|
475 |
+
</React.Fragment>
|
476 |
+
))}
|
477 |
+
</tr>
|
478 |
+
|
479 |
+
{/* Individual metric rows */}
|
480 |
+
{openSubGroups[group]?.[subGroup] &&
|
481 |
+
// Sort visibleSubgroupMetrics alphabetically by the clean metric name
|
482 |
+
[...visibleSubgroupMetrics]
|
483 |
+
.sort((a, b) => {
|
484 |
+
// For metrics with format {category}_{strength}_{overall_metric_name},
|
485 |
+
// First sort by category, then by overall_metric_name, then by strength
|
486 |
+
|
487 |
+
// First extract the overall metric group
|
488 |
+
const getOverallMetricGroup = (metric: string) => {
|
489 |
+
for (const overall of overallMetrics) {
|
490 |
+
if (
|
491 |
+
metric.endsWith(`_${overall}`) ||
|
492 |
+
metric === overall
|
493 |
+
) {
|
494 |
+
return overall
|
495 |
+
}
|
496 |
+
}
|
497 |
+
return ''
|
498 |
+
}
|
499 |
+
|
500 |
+
const overallA = getOverallMetricGroup(a)
|
501 |
+
const overallB = getOverallMetricGroup(b)
|
502 |
+
|
503 |
+
// Extract the strength (last part before the overall metric)
|
504 |
+
const stripOverall = (metric: string, overall: string) => {
|
505 |
+
if (metric.endsWith(`_${overall}`)) {
|
506 |
+
// Remove the overall metric group and any preceding underscore
|
507 |
+
const stripped = metric.slice(
|
508 |
+
0,
|
509 |
+
metric.length - overall.length - 1
|
510 |
+
)
|
511 |
+
const parts = stripped.split('_')
|
512 |
+
return parts.length > 0 ? parts[parts.length - 1] : ''
|
513 |
+
}
|
514 |
+
return metric
|
515 |
+
}
|
516 |
+
|
517 |
+
// Extract the category (what remains after removing strength and overall_metric_name)
|
518 |
+
const getCategory = (metric: string, overall: string) => {
|
519 |
+
if (metric.endsWith(`_${overall}`)) {
|
520 |
+
const stripped = metric.slice(
|
521 |
+
0,
|
522 |
+
metric.length - overall.length - 1
|
523 |
+
)
|
524 |
+
const parts = stripped.split('_')
|
525 |
+
// Remove the last part (strength) and join the rest (category)
|
526 |
+
return parts.length > 1
|
527 |
+
? parts.slice(0, parts.length - 1).join('_')
|
528 |
+
: ''
|
529 |
+
}
|
530 |
+
return metric
|
531 |
+
}
|
532 |
+
|
533 |
+
const categoryA = getCategory(a, overallA)
|
534 |
+
const categoryB = getCategory(b, overallB)
|
535 |
+
|
536 |
+
// First sort by category
|
537 |
+
if (categoryA !== categoryB) {
|
538 |
+
return categoryA.localeCompare(categoryB)
|
539 |
+
}
|
540 |
+
|
541 |
+
// Then sort by overall metric name
|
542 |
+
if (overallA !== overallB) {
|
543 |
+
return overallA.localeCompare(overallB)
|
544 |
+
}
|
545 |
+
|
546 |
+
// Finally sort by strength
|
547 |
+
const subA = stripOverall(a, overallA)
|
548 |
+
const subB = stripOverall(b, overallB)
|
549 |
+
|
550 |
+
// Try to parse subA and subB as numbers, handling k/m/b suffixes
|
551 |
+
const parseNumber = (str: string) => {
|
552 |
+
const match = str.match(/^(\d+(?:\.\d+)?)([kKmMbB]?)$/)
|
553 |
+
if (!match) return NaN
|
554 |
+
let [_, num, suffix] = match
|
555 |
+
let value = parseFloat(num)
|
556 |
+
switch (suffix.toLowerCase()) {
|
557 |
+
case 'k':
|
558 |
+
value *= 1e3
|
559 |
+
break
|
560 |
+
case 'm':
|
561 |
+
value *= 1e6
|
562 |
+
break
|
563 |
+
case 'b':
|
564 |
+
value *= 1e9
|
565 |
+
break
|
566 |
+
}
|
567 |
+
return value
|
568 |
+
}
|
569 |
+
|
570 |
+
const numA = parseNumber(subA)
|
571 |
+
const numB = parseNumber(subB)
|
572 |
+
|
573 |
+
if (!isNaN(numA) && !isNaN(numB)) {
|
574 |
+
return numA - numB
|
575 |
+
}
|
576 |
+
// Fallback to string comparison if not both numbers
|
577 |
+
return subA.localeCompare(subB)
|
578 |
+
})
|
579 |
+
.map((metric) => {
|
580 |
+
const row = tableRows.find((r) => r.metric === metric)
|
581 |
+
if (!row) return null
|
582 |
+
|
583 |
+
// Extract the metric name (after the underscore)
|
584 |
+
const metricName = metric.includes('_')
|
585 |
+
? metric.split('_').slice(1).join('_')
|
586 |
+
: metric
|
587 |
+
|
588 |
+
return (
|
589 |
+
<tr key={metric} className="hover:bg-base-100">
|
590 |
+
<td className="sticky left-0 bg-base-100 z-10 pl-10 border-gray-700 border">
|
591 |
+
{metric}
|
592 |
+
</td>
|
593 |
+
{/* For each metric column */}
|
594 |
+
{overallMetrics
|
595 |
+
.filter((oMetric) =>
|
596 |
+
selectedOverallMetrics.has(oMetric)
|
597 |
+
)
|
598 |
+
.map((oMetric) => {
|
599 |
+
// Only show values for the matching metric
|
600 |
+
const isMatchingMetric =
|
601 |
+
findAllMetricsForName(oMetric).includes(metric)
|
602 |
+
|
603 |
+
if (!isMatchingMetric) {
|
604 |
+
// Fill empty cells for non-matching metrics
|
605 |
+
return (
|
606 |
+
<React.Fragment key={`${metric}-${oMetric}`}>
|
607 |
+
{tableHeader
|
608 |
+
.filter((model) =>
|
609 |
+
selectedModels.has(model)
|
610 |
+
)
|
611 |
+
.map((col) => (
|
612 |
+
<td
|
613 |
+
key={`${metric}-${oMetric}-${col}`}
|
614 |
+
className="text-center border-gray-700 border"
|
615 |
+
></td>
|
616 |
+
))}
|
617 |
+
</React.Fragment>
|
618 |
+
)
|
619 |
+
}
|
620 |
+
return (
|
621 |
+
<React.Fragment key={`${metric}-${oMetric}`}>
|
622 |
+
{tableHeader
|
623 |
+
.filter((model) => selectedModels.has(model))
|
624 |
+
.map((col) => {
|
625 |
+
const cell = row[col]
|
626 |
+
return (
|
627 |
+
<td
|
628 |
+
key={`${metric}-${oMetric}-${col}`}
|
629 |
+
className="text-center border-gray-700 border"
|
630 |
+
>
|
631 |
+
{!isNaN(Number(cell))
|
632 |
+
? Number(Number(cell).toFixed(3))
|
633 |
+
: cell}
|
634 |
+
</td>
|
635 |
+
)
|
636 |
+
})}
|
637 |
+
</React.Fragment>
|
638 |
+
)
|
639 |
+
})}
|
640 |
+
</tr>
|
641 |
+
)
|
642 |
+
})}
|
643 |
+
</React.Fragment>
|
644 |
+
)
|
645 |
+
})}
|
646 |
+
</React.Fragment>
|
647 |
+
)
|
648 |
+
})}
|
649 |
+
</tbody>
|
650 |
+
</table>
|
651 |
+
</div>
|
652 |
</>
|
653 |
)}
|
654 |
</div>
|
frontend/src/components/LoadingSpinner.tsx
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import React from 'react'
|
2 |
+
|
3 |
+
interface LoadingSpinnerProps {
|
4 |
+
minHeight?: string
|
5 |
+
}
|
6 |
+
|
7 |
+
const LoadingSpinner: React.FC<LoadingSpinnerProps> = ({ minHeight = '300px' }) => {
|
8 |
+
return (
|
9 |
+
<div className={`flex items-center justify-center min-h-[${minHeight}]`}>
|
10 |
+
<span className="loading loading-spinner loading-lg text-primary"></span>
|
11 |
+
</div>
|
12 |
+
)
|
13 |
+
}
|
14 |
+
|
15 |
+
export default LoadingSpinner
|
frontend/src/components/ModelFilter.tsx
CHANGED
@@ -19,7 +19,7 @@ const ModelFilter: React.FC<ModelFilterProps> = ({ models, selectedModels, setSe
|
|
19 |
|
20 |
return (
|
21 |
<div className="w-full mb-4">
|
22 |
-
<fieldset className="fieldset w-full p-4 rounded border">
|
23 |
<legend className="fieldset-legend font-semibold">
|
24 |
Models ({selectedModels.size}/{models.length})
|
25 |
</legend>
|
|
|
19 |
|
20 |
return (
|
21 |
<div className="w-full mb-4">
|
22 |
+
<fieldset className="fieldset w-full p-4 rounded border border-gray-700">
|
23 |
<legend className="fieldset-legend font-semibold">
|
24 |
Models ({selectedModels.size}/{models.length})
|
25 |
</legend>
|