Commit
·
d4d998a
0
Parent(s):
init
Browse files- .env.template +6 -0
- .gitignore +45 -0
- README.md +82 -0
- app.py +281 -0
- requirements.txt +7 -0
- src/about.py +75 -0
- src/display/css_html_js.py +46 -0
- src/display/formatting.py +71 -0
- src/display/utils.py +177 -0
- src/envs.py +27 -0
- src/leaderboard/processor.py +180 -0
- src/populate.py +211 -0
- src/submission/submit.py +105 -0
.env.template
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
HF_TOKEN="your_huggingface_write_token"
|
2 |
+
OWNER="your_huggingface_username_or_org"
|
3 |
+
RESULTS_DATASET_ID="your_username/guardbench-results"
|
4 |
+
SUBMITTER_TOKEN="your_secret_submission_token"
|
5 |
+
ADMIN_USERNAME="admin"
|
6 |
+
ADMIN_PASSWORD="password" # Change this!
|
.gitignore
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Python
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
*.so
|
6 |
+
.Python
|
7 |
+
env/
|
8 |
+
build/
|
9 |
+
develop-eggs/
|
10 |
+
dist/
|
11 |
+
downloads/
|
12 |
+
eggs/
|
13 |
+
.eggs/
|
14 |
+
lib/
|
15 |
+
lib64/
|
16 |
+
parts/
|
17 |
+
sdist/
|
18 |
+
var/
|
19 |
+
.venv/
|
20 |
+
*.egg-info/
|
21 |
+
.installed.cfg
|
22 |
+
*.egg
|
23 |
+
|
24 |
+
# Environment variables
|
25 |
+
.env
|
26 |
+
|
27 |
+
# Virtual Environment
|
28 |
+
venv/
|
29 |
+
ENV/
|
30 |
+
|
31 |
+
# IDE
|
32 |
+
.idea/
|
33 |
+
.vscode/
|
34 |
+
*.swp
|
35 |
+
*.swo
|
36 |
+
|
37 |
+
# OS
|
38 |
+
.DS_Store
|
39 |
+
Thumbs.db
|
40 |
+
|
41 |
+
# Hugging Face cache
|
42 |
+
eval-queue/
|
43 |
+
eval-results/
|
44 |
+
eval-queue-bk/
|
45 |
+
eval-results-bk/
|
README.md
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# GuardBench Leaderboard
|
2 |
+
|
3 |
+
A HuggingFace leaderboard for the GuardBench project that allows users to submit evaluation results and view the performance of different models on safety guardrails.
|
4 |
+
|
5 |
+
## Features
|
6 |
+
|
7 |
+
- Display model performance across multiple safety categories
|
8 |
+
- Accept JSONL submissions with evaluation results
|
9 |
+
- Store submissions in a HuggingFace dataset
|
10 |
+
- Secure submission process with token authentication
|
11 |
+
- Automatic data refresh from HuggingFace
|
12 |
+
|
13 |
+
## Setup
|
14 |
+
|
15 |
+
1. Clone this repository
|
16 |
+
2. Install dependencies:
|
17 |
+
```
|
18 |
+
pip install -r requirements.txt
|
19 |
+
```
|
20 |
+
3. Create a `.env` file based on the `.env.template`:
|
21 |
+
```
|
22 |
+
cp .env.template .env
|
23 |
+
```
|
24 |
+
4. Edit the `.env` file with your HuggingFace credentials and settings
|
25 |
+
5. Run the application:
|
26 |
+
```
|
27 |
+
python app.py
|
28 |
+
```
|
29 |
+
|
30 |
+
## Submission Format
|
31 |
+
|
32 |
+
Submissions should be in JSONL format, with each line containing a JSON object with the following structure:
|
33 |
+
|
34 |
+
```json
|
35 |
+
{
|
36 |
+
"model_name": "model-name",
|
37 |
+
"per_category_metrics": {
|
38 |
+
"Category Name": {
|
39 |
+
"default_prompts": {
|
40 |
+
"f1_binary": 0.95,
|
41 |
+
"recall_binary": 0.93,
|
42 |
+
"precision_binary": 1.0,
|
43 |
+
"error_ratio": 0.0,
|
44 |
+
"avg_runtime_ms": 3000
|
45 |
+
},
|
46 |
+
"jailbreaked_prompts": { ... },
|
47 |
+
"default_answers": { ... },
|
48 |
+
"jailbreaked_answers": { ... }
|
49 |
+
},
|
50 |
+
...
|
51 |
+
},
|
52 |
+
"avg_metrics": {
|
53 |
+
"default_prompts": {
|
54 |
+
"f1_binary": 0.97,
|
55 |
+
"recall_binary": 0.95,
|
56 |
+
"precision_binary": 1.0,
|
57 |
+
"error_ratio": 0.0,
|
58 |
+
"avg_runtime_ms": 3000
|
59 |
+
},
|
60 |
+
"jailbreaked_prompts": { ... },
|
61 |
+
"default_answers": { ... },
|
62 |
+
"jailbreaked_answers": { ... }
|
63 |
+
}
|
64 |
+
}
|
65 |
+
```
|
66 |
+
|
67 |
+
## Environment Variables
|
68 |
+
|
69 |
+
- `HF_TOKEN`: Your HuggingFace write token
|
70 |
+
- `OWNER`: Your HuggingFace username or organization
|
71 |
+
- `RESULTS_DATASET_ID`: The ID of the dataset to store results (e.g., "username/guardbench-results")
|
72 |
+
- `SUBMITTER_TOKEN`: A secret token required for submissions
|
73 |
+
- `ADMIN_USERNAME`: Username for admin access to the leaderboard
|
74 |
+
- `ADMIN_PASSWORD`: Password for admin access to the leaderboard
|
75 |
+
|
76 |
+
## Deployment
|
77 |
+
|
78 |
+
This application can be deployed as a HuggingFace Space for public access. Follow the HuggingFace Spaces documentation for deployment instructions.
|
79 |
+
|
80 |
+
## License
|
81 |
+
|
82 |
+
MIT
|
app.py
ADDED
@@ -0,0 +1,281 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
GuardBench Leaderboard Application
|
3 |
+
"""
|
4 |
+
|
5 |
+
import os
|
6 |
+
import json
|
7 |
+
import tempfile
|
8 |
+
import logging
|
9 |
+
import gradio as gr
|
10 |
+
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
|
11 |
+
import pandas as pd
|
12 |
+
from apscheduler.schedulers.background import BackgroundScheduler
|
13 |
+
|
14 |
+
from src.about import (
|
15 |
+
CITATION_BUTTON_LABEL,
|
16 |
+
CITATION_BUTTON_TEXT,
|
17 |
+
EVALUATION_QUEUE_TEXT,
|
18 |
+
INTRODUCTION_TEXT,
|
19 |
+
LLM_BENCHMARKS_TEXT,
|
20 |
+
TITLE,
|
21 |
+
)
|
22 |
+
from src.display.css_html_js import custom_css
|
23 |
+
from src.display.utils import (
|
24 |
+
GUARDBENCH_COLUMN,
|
25 |
+
DISPLAY_COLS,
|
26 |
+
METRIC_COLS,
|
27 |
+
HIDDEN_COLS,
|
28 |
+
NEVER_HIDDEN_COLS,
|
29 |
+
CATEGORIES,
|
30 |
+
TEST_TYPES,
|
31 |
+
ModelType,
|
32 |
+
Precision,
|
33 |
+
WeightType
|
34 |
+
)
|
35 |
+
from src.display.formatting import styled_message, styled_error, styled_warning
|
36 |
+
from src.envs import (
|
37 |
+
ADMIN_USERNAME,
|
38 |
+
ADMIN_PASSWORD,
|
39 |
+
RESULTS_DATASET_ID,
|
40 |
+
SUBMITTER_TOKEN,
|
41 |
+
TOKEN,
|
42 |
+
DATA_PATH
|
43 |
+
)
|
44 |
+
from src.populate import get_leaderboard_df, download_leaderboard_data, get_category_leaderboard_df
|
45 |
+
from src.submission.submit import process_submission
|
46 |
+
|
47 |
+
# Configure logging
|
48 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
49 |
+
logger = logging.getLogger(__name__)
|
50 |
+
|
51 |
+
# Ensure data directory exists
|
52 |
+
os.makedirs(DATA_PATH, exist_ok=True)
|
53 |
+
|
54 |
+
# Initialize leaderboard data
|
55 |
+
try:
|
56 |
+
logger.info("Initializing leaderboard data...")
|
57 |
+
LEADERBOARD_DF = get_leaderboard_df()
|
58 |
+
logger.info(f"Loaded leaderboard with {len(LEADERBOARD_DF)} entries")
|
59 |
+
except Exception as e:
|
60 |
+
logger.error(f"Error loading leaderboard data: {e}")
|
61 |
+
LEADERBOARD_DF = pd.DataFrame()
|
62 |
+
|
63 |
+
|
64 |
+
def init_leaderboard(dataframe):
|
65 |
+
"""
|
66 |
+
Initialize the leaderboard component.
|
67 |
+
"""
|
68 |
+
if dataframe is None or dataframe.empty:
|
69 |
+
# Create an empty dataframe with the right columns
|
70 |
+
columns = [getattr(GUARDBENCH_COLUMN, col).name for col in DISPLAY_COLS]
|
71 |
+
dataframe = pd.DataFrame(columns=columns)
|
72 |
+
logger.warning("Initializing empty leaderboard")
|
73 |
+
|
74 |
+
return Leaderboard(
|
75 |
+
value=dataframe,
|
76 |
+
datatype=[getattr(GUARDBENCH_COLUMN, col).type for col in DISPLAY_COLS],
|
77 |
+
select_columns=SelectColumns(
|
78 |
+
default_selection=[getattr(GUARDBENCH_COLUMN, col).name for col in DISPLAY_COLS],
|
79 |
+
cant_deselect=[getattr(GUARDBENCH_COLUMN, col).name for col in NEVER_HIDDEN_COLS],
|
80 |
+
label="Select Columns to Display:",
|
81 |
+
),
|
82 |
+
search_columns=[GUARDBENCH_COLUMN.model.name],
|
83 |
+
hide_columns=[getattr(GUARDBENCH_COLUMN, col).name for col in HIDDEN_COLS],
|
84 |
+
filter_columns=[
|
85 |
+
ColumnFilter(GUARDBENCH_COLUMN.model_type.name, type="checkboxgroup", label="Model types"),
|
86 |
+
],
|
87 |
+
interactive=False,
|
88 |
+
)
|
89 |
+
|
90 |
+
|
91 |
+
def submit_results(
|
92 |
+
model_name: str,
|
93 |
+
base_model: str,
|
94 |
+
revision: str,
|
95 |
+
precision: str,
|
96 |
+
weight_type: str,
|
97 |
+
model_type: str,
|
98 |
+
submission_file: tempfile._TemporaryFileWrapper
|
99 |
+
):
|
100 |
+
"""
|
101 |
+
Handle submission of results with model metadata.
|
102 |
+
"""
|
103 |
+
if submission_file is None:
|
104 |
+
return styled_error("No submission file provided")
|
105 |
+
|
106 |
+
if not model_name:
|
107 |
+
return styled_error("Model name is required")
|
108 |
+
|
109 |
+
if not model_type:
|
110 |
+
return styled_error("Please select a model type")
|
111 |
+
|
112 |
+
file_path = submission_file.name
|
113 |
+
logger.info(f"Received submission for model {model_name}: {file_path}")
|
114 |
+
|
115 |
+
# Add metadata to the submission
|
116 |
+
metadata = {
|
117 |
+
"model_name": model_name,
|
118 |
+
"base_model": base_model,
|
119 |
+
"revision": revision if revision else "main",
|
120 |
+
"precision": precision,
|
121 |
+
"weight_type": weight_type,
|
122 |
+
"model_type": model_type
|
123 |
+
}
|
124 |
+
|
125 |
+
# Process the submission
|
126 |
+
result = process_submission(file_path, metadata)
|
127 |
+
|
128 |
+
# Refresh the leaderboard data
|
129 |
+
global LEADERBOARD_DF
|
130 |
+
try:
|
131 |
+
logger.info("Refreshing leaderboard data after submission...")
|
132 |
+
LEADERBOARD_DF = get_leaderboard_df()
|
133 |
+
logger.info("Refreshed leaderboard data after submission")
|
134 |
+
except Exception as e:
|
135 |
+
logger.error(f"Error refreshing leaderboard data: {e}")
|
136 |
+
|
137 |
+
return result
|
138 |
+
|
139 |
+
|
140 |
+
def refresh_data():
|
141 |
+
"""
|
142 |
+
Refresh the leaderboard data from HuggingFace.
|
143 |
+
"""
|
144 |
+
global LEADERBOARD_DF
|
145 |
+
try:
|
146 |
+
logger.info("Performing scheduled refresh of leaderboard data...")
|
147 |
+
LEADERBOARD_DF = get_leaderboard_df()
|
148 |
+
logger.info("Scheduled refresh of leaderboard data completed")
|
149 |
+
except Exception as e:
|
150 |
+
logger.error(f"Error in scheduled refresh: {e}")
|
151 |
+
|
152 |
+
|
153 |
+
# Create Gradio app
|
154 |
+
demo = gr.Blocks(css=custom_css)
|
155 |
+
|
156 |
+
with demo:
|
157 |
+
gr.HTML(TITLE)
|
158 |
+
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
159 |
+
|
160 |
+
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
161 |
+
with gr.TabItem("🏅 Leaderboard", elem_id="guardbench-leaderboard-tab", id=0):
|
162 |
+
refresh_button = gr.Button("Refresh Leaderboard")
|
163 |
+
|
164 |
+
# Create tabs for each category
|
165 |
+
with gr.Tabs(elem_classes="category-tabs") as category_tabs:
|
166 |
+
# First tab for average metrics across all categories
|
167 |
+
with gr.TabItem("📊 Overall Performance", elem_id="overall-tab"):
|
168 |
+
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
169 |
+
|
170 |
+
# Create a tab for each category
|
171 |
+
for category in CATEGORIES:
|
172 |
+
with gr.TabItem(f"{category}", elem_id=f"category-{category.lower().replace(' ', '-')}-tab"):
|
173 |
+
category_df = get_category_leaderboard_df(category)
|
174 |
+
category_leaderboard = init_leaderboard(category_df)
|
175 |
+
|
176 |
+
# Refresh button functionality
|
177 |
+
refresh_button.click(
|
178 |
+
fn=lambda: [
|
179 |
+
init_leaderboard(get_leaderboard_df()),
|
180 |
+
*[init_leaderboard(get_category_leaderboard_df(category)) for category in CATEGORIES]
|
181 |
+
],
|
182 |
+
inputs=[],
|
183 |
+
outputs=[leaderboard] + [category_tabs.children[i].children[0] for i in range(1, len(CATEGORIES) + 1)]
|
184 |
+
)
|
185 |
+
|
186 |
+
with gr.TabItem("📝 About", elem_id="guardbench-about-tab", id=1):
|
187 |
+
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
188 |
+
|
189 |
+
with gr.TabItem("🚀 Submit", elem_id="guardbench-submit-tab", id=2):
|
190 |
+
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
191 |
+
|
192 |
+
with gr.Row():
|
193 |
+
gr.Markdown("# ✉️✨ Submit your results here!", elem_classes="markdown-text")
|
194 |
+
|
195 |
+
with gr.Row():
|
196 |
+
with gr.Column():
|
197 |
+
model_name_textbox = gr.Textbox(label="Model name")
|
198 |
+
revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
|
199 |
+
model_type = gr.Dropdown(
|
200 |
+
choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
|
201 |
+
label="Model type",
|
202 |
+
multiselect=False,
|
203 |
+
value=None,
|
204 |
+
interactive=True,
|
205 |
+
)
|
206 |
+
|
207 |
+
with gr.Column():
|
208 |
+
precision = gr.Dropdown(
|
209 |
+
choices=[i.value.name for i in Precision if i != Precision.Unknown],
|
210 |
+
label="Precision",
|
211 |
+
multiselect=False,
|
212 |
+
value="float16",
|
213 |
+
interactive=True,
|
214 |
+
)
|
215 |
+
weight_type = gr.Dropdown(
|
216 |
+
choices=[i.value.name for i in WeightType],
|
217 |
+
label="Weights type",
|
218 |
+
multiselect=False,
|
219 |
+
value="Original",
|
220 |
+
interactive=True,
|
221 |
+
)
|
222 |
+
base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
|
223 |
+
|
224 |
+
with gr.Row():
|
225 |
+
file_input = gr.File(
|
226 |
+
label="Upload JSONL Results File",
|
227 |
+
file_types=[".jsonl"]
|
228 |
+
)
|
229 |
+
|
230 |
+
submit_button = gr.Button("Submit Results")
|
231 |
+
result_output = gr.Markdown()
|
232 |
+
|
233 |
+
submit_button.click(
|
234 |
+
fn=submit_results,
|
235 |
+
inputs=[
|
236 |
+
model_name_textbox,
|
237 |
+
base_model_name_textbox,
|
238 |
+
revision_name_textbox,
|
239 |
+
precision,
|
240 |
+
weight_type,
|
241 |
+
model_type,
|
242 |
+
file_input
|
243 |
+
],
|
244 |
+
outputs=result_output
|
245 |
+
)
|
246 |
+
|
247 |
+
with gr.Row():
|
248 |
+
with gr.Accordion("📙 Citation", open=False):
|
249 |
+
citation_button = gr.Textbox(
|
250 |
+
value=CITATION_BUTTON_TEXT,
|
251 |
+
label=CITATION_BUTTON_LABEL,
|
252 |
+
lines=10,
|
253 |
+
elem_id="citation-button",
|
254 |
+
show_copy_button=True,
|
255 |
+
)
|
256 |
+
|
257 |
+
with gr.Accordion("ℹ️ Dataset Information", open=False):
|
258 |
+
dataset_info = gr.Markdown(f"""
|
259 |
+
## Dataset Information
|
260 |
+
|
261 |
+
Results are stored in the HuggingFace dataset: [{RESULTS_DATASET_ID}](https://huggingface.co/datasets/{RESULTS_DATASET_ID})
|
262 |
+
|
263 |
+
Last updated: {pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S UTC")}
|
264 |
+
""")
|
265 |
+
|
266 |
+
# Set up scheduler to refresh data periodically
|
267 |
+
scheduler = BackgroundScheduler()
|
268 |
+
scheduler.add_job(refresh_data, 'interval', minutes=30)
|
269 |
+
scheduler.start()
|
270 |
+
|
271 |
+
# Launch the app
|
272 |
+
if __name__ == "__main__":
|
273 |
+
# Set up authentication if credentials are provided
|
274 |
+
if not ADMIN_USERNAME or not ADMIN_PASSWORD:
|
275 |
+
logger.warning("Admin username or password not set. Running without authentication.")
|
276 |
+
auth = None
|
277 |
+
else:
|
278 |
+
auth = (ADMIN_USERNAME, ADMIN_PASSWORD)
|
279 |
+
|
280 |
+
# Launch the app
|
281 |
+
demo.launch(server_name="0.0.0.0", server_port=7860, auth=auth)
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio>=3.50.2
|
2 |
+
huggingface_hub>=0.19.0
|
3 |
+
datasets>=2.16.0
|
4 |
+
pandas>=2.0.0
|
5 |
+
python-dotenv>=1.0.0
|
6 |
+
apscheduler>=3.10.1
|
7 |
+
gradio-leaderboard
|
src/about.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Text content for the GuardBench Leaderboard.
|
3 |
+
"""
|
4 |
+
|
5 |
+
TITLE = """
|
6 |
+
<div style="text-align: center; margin-bottom: 1rem">
|
7 |
+
<h1>GuardBench Leaderboard</h1>
|
8 |
+
</div>
|
9 |
+
"""
|
10 |
+
|
11 |
+
INTRODUCTION_TEXT = """
|
12 |
+
## Introduction
|
13 |
+
|
14 |
+
GuardBench is a comprehensive benchmark for evaluating the safety guardrails of large language models (LLMs).
|
15 |
+
This leaderboard tracks model performance across various safety categories, including harmful content detection,
|
16 |
+
jailbreak resistance, and more.
|
17 |
+
|
18 |
+
Models are evaluated on their ability to properly refuse harmful requests and detect problematic content
|
19 |
+
across multiple categories and test scenarios.
|
20 |
+
"""
|
21 |
+
|
22 |
+
LLM_BENCHMARKS_TEXT = """
|
23 |
+
## GuardBench Evaluation Methodology
|
24 |
+
|
25 |
+
GuardBench evaluates models across multiple safety categories:
|
26 |
+
|
27 |
+
- **Criminal, Violent, and Terrorist Activity**
|
28 |
+
- **Manipulation, Deception, and Misinformation**
|
29 |
+
- **Creative Content Involving Illicit Themes**
|
30 |
+
- **Sexual Content and Violence**
|
31 |
+
- **Political Corruption and Legal Evasion**
|
32 |
+
- **Labor Exploitation and Human Trafficking**
|
33 |
+
- **Environmental and Industrial Harm**
|
34 |
+
- **Animal Cruelty and Exploitation**
|
35 |
+
- **Self-Harm and Suicidal Ideation**
|
36 |
+
- **Safe Prompts**
|
37 |
+
|
38 |
+
Each category is tested with:
|
39 |
+
- **Default Prompts**: Standard harmful requests
|
40 |
+
- **Jailbreaked Prompts**: Adversarial attempts to bypass safety measures
|
41 |
+
- **Default Answers**: Evaluation of harmful content in responses
|
42 |
+
- **Jailbreaked Answers**: Evaluation of harmful content in responses to jailbreak attempts
|
43 |
+
|
44 |
+
Metrics include:
|
45 |
+
- **F1 Binary**: Harmonic mean of precision and recall
|
46 |
+
- **Recall Binary**: Proportion of actual harmful content correctly identified
|
47 |
+
- **Precision Binary**: Proportion of identified harmful content that was actually harmful
|
48 |
+
- **Error Ratio**: Rate of errors during evaluation
|
49 |
+
- **Average Runtime**: Average processing time in milliseconds
|
50 |
+
"""
|
51 |
+
|
52 |
+
EVALUATION_QUEUE_TEXT = """
|
53 |
+
## Submission Process
|
54 |
+
|
55 |
+
To submit your model results to the GuardBench leaderboard:
|
56 |
+
|
57 |
+
1. Evaluate your model using the [GuardBench framework](https://github.com/huggingface/guard-bench)
|
58 |
+
2. Format your results as a JSONL file according to our schema
|
59 |
+
3. Submit your results using the submission form with your authorized token
|
60 |
+
|
61 |
+
Results will be processed and added to the leaderboard once validated.
|
62 |
+
"""
|
63 |
+
|
64 |
+
CITATION_BUTTON_LABEL = "Cite GuardBench"
|
65 |
+
|
66 |
+
CITATION_BUTTON_TEXT = """
|
67 |
+
@misc{guardbench2023,
|
68 |
+
author = {GuardBench Team},
|
69 |
+
title = {GuardBench: Comprehensive Benchmark for LLM Safety Guardrails},
|
70 |
+
year = {2023},
|
71 |
+
publisher = {GitHub},
|
72 |
+
journal = {GitHub repository},
|
73 |
+
howpublished = {\\url{https://github.com/huggingface/guard-bench}}
|
74 |
+
}
|
75 |
+
"""
|
src/display/css_html_js.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
CSS and styling for the GuardBench Leaderboard.
|
3 |
+
"""
|
4 |
+
|
5 |
+
custom_css = """
|
6 |
+
.markdown-text {
|
7 |
+
font-size: 16px !important;
|
8 |
+
text-align: justify !important;
|
9 |
+
}
|
10 |
+
|
11 |
+
.tab-buttons button.selected {
|
12 |
+
border-color: #2196F3 !important;
|
13 |
+
background: #E3F2FD !important;
|
14 |
+
color: #2196F3 !important;
|
15 |
+
}
|
16 |
+
|
17 |
+
#citation-button textarea {
|
18 |
+
font-family: monospace !important;
|
19 |
+
}
|
20 |
+
|
21 |
+
.leaderboard-container {
|
22 |
+
margin-top: 20px;
|
23 |
+
}
|
24 |
+
|
25 |
+
.category-header {
|
26 |
+
font-weight: bold;
|
27 |
+
background-color: #f5f5f5;
|
28 |
+
padding: 10px;
|
29 |
+
margin-top: 15px;
|
30 |
+
border-radius: 5px;
|
31 |
+
}
|
32 |
+
|
33 |
+
.metric-name {
|
34 |
+
font-weight: bold;
|
35 |
+
color: #2196F3;
|
36 |
+
}
|
37 |
+
|
38 |
+
.model-name {
|
39 |
+
font-weight: bold;
|
40 |
+
}
|
41 |
+
|
42 |
+
.model-link:hover {
|
43 |
+
text-decoration: underline;
|
44 |
+
color: #1976D2;
|
45 |
+
}
|
46 |
+
"""
|
src/display/formatting.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Formatting utilities for the GuardBench Leaderboard.
|
3 |
+
"""
|
4 |
+
|
5 |
+
import pandas as pd
|
6 |
+
import numpy as np
|
7 |
+
|
8 |
+
|
9 |
+
def make_clickable_model(model_name: str) -> str:
|
10 |
+
"""
|
11 |
+
Create a clickable link for a model name.
|
12 |
+
"""
|
13 |
+
return f'<a href="https://huggingface.co/{model_name}" target="_blank">{model_name}</a>'
|
14 |
+
|
15 |
+
|
16 |
+
def has_no_nan_values(df: pd.DataFrame, columns: list) -> pd.Series:
|
17 |
+
"""
|
18 |
+
Check if a row has no NaN values in the specified columns.
|
19 |
+
"""
|
20 |
+
return ~df[columns].isna().any(axis=1)
|
21 |
+
|
22 |
+
|
23 |
+
def format_percentage(value: float) -> str:
|
24 |
+
"""
|
25 |
+
Format a value as a percentage.
|
26 |
+
"""
|
27 |
+
if pd.isna(value):
|
28 |
+
return "N/A"
|
29 |
+
return f"{value * 100:.2f}%"
|
30 |
+
|
31 |
+
|
32 |
+
def format_number(value: float, precision: int = 2) -> str:
|
33 |
+
"""
|
34 |
+
Format a number with specified precision.
|
35 |
+
"""
|
36 |
+
if pd.isna(value):
|
37 |
+
return "N/A"
|
38 |
+
return f"{value:.{precision}f}"
|
39 |
+
|
40 |
+
|
41 |
+
def styled_message(message: str) -> str:
|
42 |
+
"""
|
43 |
+
Format a success message with styling.
|
44 |
+
"""
|
45 |
+
return f"""
|
46 |
+
<div style="padding: 10px; border-radius: 5px; background-color: #e6f7e6; color: #2e7d32; border: 1px solid #2e7d32;">
|
47 |
+
✅ {message}
|
48 |
+
</div>
|
49 |
+
"""
|
50 |
+
|
51 |
+
|
52 |
+
def styled_warning(message: str) -> str:
|
53 |
+
"""
|
54 |
+
Format a warning message with styling.
|
55 |
+
"""
|
56 |
+
return f"""
|
57 |
+
<div style="padding: 10px; border-radius: 5px; background-color: #fff8e1; color: #ff8f00; border: 1px solid #ff8f00;">
|
58 |
+
⚠️ {message}
|
59 |
+
</div>
|
60 |
+
"""
|
61 |
+
|
62 |
+
|
63 |
+
def styled_error(message: str) -> str:
|
64 |
+
"""
|
65 |
+
Format an error message with styling.
|
66 |
+
"""
|
67 |
+
return f"""
|
68 |
+
<div style="padding: 10px; border-radius: 5px; background-color: #ffebee; color: #c62828; border: 1px solid #c62828;">
|
69 |
+
❌ {message}
|
70 |
+
</div>
|
71 |
+
"""
|
src/display/utils.py
ADDED
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Utility classes and functions for the GuardBench Leaderboard display.
|
3 |
+
"""
|
4 |
+
|
5 |
+
from dataclasses import dataclass, field, fields
|
6 |
+
from enum import Enum, auto
|
7 |
+
from typing import List, Optional
|
8 |
+
|
9 |
+
|
10 |
+
class ModelType(Enum):
|
11 |
+
"""Model types for the leaderboard."""
|
12 |
+
Unknown = auto()
|
13 |
+
OpenSource = auto()
|
14 |
+
ClosedSource = auto()
|
15 |
+
API = auto()
|
16 |
+
|
17 |
+
def to_str(self, separator: str = " ") -> str:
|
18 |
+
"""Convert enum to string with separator."""
|
19 |
+
if self == ModelType.Unknown:
|
20 |
+
return "Unknown"
|
21 |
+
elif self == ModelType.OpenSource:
|
22 |
+
return f"Open{separator}Source"
|
23 |
+
elif self == ModelType.ClosedSource:
|
24 |
+
return f"Closed{separator}Source"
|
25 |
+
elif self == ModelType.API:
|
26 |
+
return "API"
|
27 |
+
return "Unknown"
|
28 |
+
|
29 |
+
|
30 |
+
class Precision(Enum):
|
31 |
+
"""Model precision types."""
|
32 |
+
Unknown = auto()
|
33 |
+
float16 = auto()
|
34 |
+
bfloat16 = auto()
|
35 |
+
float32 = auto()
|
36 |
+
int8 = auto()
|
37 |
+
int4 = auto()
|
38 |
+
|
39 |
+
|
40 |
+
class WeightType(Enum):
|
41 |
+
"""Model weight types."""
|
42 |
+
Original = auto()
|
43 |
+
Delta = auto()
|
44 |
+
Adapter = auto()
|
45 |
+
|
46 |
+
|
47 |
+
@dataclass
|
48 |
+
class ColumnInfo:
|
49 |
+
"""Information about a column in the leaderboard."""
|
50 |
+
name: str
|
51 |
+
display_name: str
|
52 |
+
type: str = "text"
|
53 |
+
hidden: bool = False
|
54 |
+
never_hidden: bool = False
|
55 |
+
displayed_by_default: bool = True
|
56 |
+
|
57 |
+
|
58 |
+
@dataclass
|
59 |
+
class GuardBenchColumn:
|
60 |
+
"""Columns for the GuardBench leaderboard."""
|
61 |
+
model: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
62 |
+
name="model_name",
|
63 |
+
display_name="Model",
|
64 |
+
never_hidden=True,
|
65 |
+
displayed_by_default=True
|
66 |
+
))
|
67 |
+
|
68 |
+
model_type: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
69 |
+
name="model_type",
|
70 |
+
display_name="Type",
|
71 |
+
displayed_by_default=True
|
72 |
+
))
|
73 |
+
|
74 |
+
# Metrics for all categories
|
75 |
+
default_prompts_f1: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
76 |
+
name="default_prompts_f1",
|
77 |
+
display_name="Default Prompts F1",
|
78 |
+
type="number",
|
79 |
+
displayed_by_default=True
|
80 |
+
))
|
81 |
+
|
82 |
+
jailbreaked_prompts_f1: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
83 |
+
name="jailbreaked_prompts_f1",
|
84 |
+
display_name="Jailbreaked Prompts F1",
|
85 |
+
type="number",
|
86 |
+
displayed_by_default=True
|
87 |
+
))
|
88 |
+
|
89 |
+
default_answers_f1: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
90 |
+
name="default_answers_f1",
|
91 |
+
display_name="Default Answers F1",
|
92 |
+
type="number",
|
93 |
+
displayed_by_default=True
|
94 |
+
))
|
95 |
+
|
96 |
+
jailbreaked_answers_f1: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
97 |
+
name="jailbreaked_answers_f1",
|
98 |
+
display_name="Jailbreaked Answers F1",
|
99 |
+
type="number",
|
100 |
+
displayed_by_default=True
|
101 |
+
))
|
102 |
+
|
103 |
+
# Average metrics
|
104 |
+
average_f1: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
105 |
+
name="average_f1",
|
106 |
+
display_name="Average F1",
|
107 |
+
type="number",
|
108 |
+
displayed_by_default=True,
|
109 |
+
never_hidden=True
|
110 |
+
))
|
111 |
+
|
112 |
+
average_recall: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
113 |
+
name="average_recall",
|
114 |
+
display_name="Average Recall",
|
115 |
+
type="number",
|
116 |
+
displayed_by_default=False
|
117 |
+
))
|
118 |
+
|
119 |
+
average_precision: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
120 |
+
name="average_precision",
|
121 |
+
display_name="Average Precision",
|
122 |
+
type="number",
|
123 |
+
displayed_by_default=False
|
124 |
+
))
|
125 |
+
|
126 |
+
# Additional metadata
|
127 |
+
submission_date: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
128 |
+
name="submission_date",
|
129 |
+
display_name="Submission Date",
|
130 |
+
displayed_by_default=False
|
131 |
+
))
|
132 |
+
|
133 |
+
|
134 |
+
# Create instances for easy access
|
135 |
+
GUARDBENCH_COLUMN = GuardBenchColumn()
|
136 |
+
|
137 |
+
# Extract column lists for different views
|
138 |
+
COLS = [f.name for f in fields(GUARDBENCH_COLUMN)]
|
139 |
+
DISPLAY_COLS = [getattr(GUARDBENCH_COLUMN, f.name).name for f in fields(GUARDBENCH_COLUMN)
|
140 |
+
if getattr(GUARDBENCH_COLUMN, f.name).displayed_by_default]
|
141 |
+
METRIC_COLS = [getattr(GUARDBENCH_COLUMN, f.name).name for f in fields(GUARDBENCH_COLUMN)
|
142 |
+
if getattr(GUARDBENCH_COLUMN, f.name).type == "number"]
|
143 |
+
HIDDEN_COLS = [getattr(GUARDBENCH_COLUMN, f.name).name for f in fields(GUARDBENCH_COLUMN)
|
144 |
+
if getattr(GUARDBENCH_COLUMN, f.name).hidden]
|
145 |
+
NEVER_HIDDEN_COLS = [getattr(GUARDBENCH_COLUMN, f.name).name for f in fields(GUARDBENCH_COLUMN)
|
146 |
+
if getattr(GUARDBENCH_COLUMN, f.name).never_hidden]
|
147 |
+
|
148 |
+
# Categories in GuardBench
|
149 |
+
CATEGORIES = [
|
150 |
+
"Criminal, Violent, and Terrorist Activity",
|
151 |
+
"Manipulation, Deception, and Misinformation",
|
152 |
+
"Creative Content Involving Illicit Themes",
|
153 |
+
"Sexual Content and Violence",
|
154 |
+
"Political Corruption and Legal Evasion",
|
155 |
+
"Labor Exploitation and Human Trafficking",
|
156 |
+
"Environmental and Industrial Harm",
|
157 |
+
"Animal Cruelty and Exploitation",
|
158 |
+
"Self–Harm and Suicidal Ideation",
|
159 |
+
"Safe Prompts"
|
160 |
+
]
|
161 |
+
|
162 |
+
# Test types in GuardBench
|
163 |
+
TEST_TYPES = [
|
164 |
+
"default_prompts",
|
165 |
+
"jailbreaked_prompts",
|
166 |
+
"default_answers",
|
167 |
+
"jailbreaked_answers"
|
168 |
+
]
|
169 |
+
|
170 |
+
# Metrics in GuardBench
|
171 |
+
METRICS = [
|
172 |
+
"f1_binary",
|
173 |
+
"recall_binary",
|
174 |
+
"precision_binary",
|
175 |
+
"error_ratio",
|
176 |
+
"avg_runtime_ms"
|
177 |
+
]
|
src/envs.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from huggingface_hub import HfApi
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
|
5 |
+
# Load environment variables
|
6 |
+
load_dotenv()
|
7 |
+
|
8 |
+
# Hugging Face configuration
|
9 |
+
TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
|
10 |
+
OWNER = os.environ.get("OWNER", "guard-bench") # Change to your org
|
11 |
+
SUBMITTER_TOKEN = os.environ.get("SUBMITTER_TOKEN")
|
12 |
+
ADMIN_USERNAME = os.environ.get("ADMIN_USERNAME")
|
13 |
+
ADMIN_PASSWORD = os.environ.get("ADMIN_PASSWORD")
|
14 |
+
|
15 |
+
# Repository IDs
|
16 |
+
REPO_ID = f"{OWNER}/leaderboard"
|
17 |
+
RESULTS_DATASET_ID = os.environ.get("RESULTS_DATASET_ID", f"{OWNER}/guardbench-results")
|
18 |
+
|
19 |
+
# Cache paths
|
20 |
+
CACHE_PATH = os.getenv("HF_HOME", ".")
|
21 |
+
DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data")
|
22 |
+
|
23 |
+
# Local data paths
|
24 |
+
LEADERBOARD_FILE = os.path.join(DATA_PATH, "leaderboard.json")
|
25 |
+
|
26 |
+
# HF API instance
|
27 |
+
API = HfApi(token=TOKEN)
|
src/leaderboard/processor.py
ADDED
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Process and transform GuardBench leaderboard data.
|
3 |
+
"""
|
4 |
+
|
5 |
+
import json
|
6 |
+
import os
|
7 |
+
import pandas as pd
|
8 |
+
from datetime import datetime
|
9 |
+
from typing import Dict, List, Any, Tuple
|
10 |
+
|
11 |
+
from src.display.utils import CATEGORIES, TEST_TYPES, METRICS
|
12 |
+
|
13 |
+
|
14 |
+
def load_leaderboard_data(file_path: str) -> Dict:
|
15 |
+
"""
|
16 |
+
Load the leaderboard data from a JSON file.
|
17 |
+
"""
|
18 |
+
if not os.path.exists(file_path):
|
19 |
+
return {"entries": [], "last_updated": datetime.now().isoformat()}
|
20 |
+
|
21 |
+
with open(file_path, 'r') as f:
|
22 |
+
data = json.load(f)
|
23 |
+
|
24 |
+
return data
|
25 |
+
|
26 |
+
|
27 |
+
def save_leaderboard_data(data: Dict, file_path: str) -> None:
|
28 |
+
"""
|
29 |
+
Save the leaderboard data to a JSON file.
|
30 |
+
"""
|
31 |
+
# Ensure the directory exists
|
32 |
+
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
33 |
+
|
34 |
+
# Update the last_updated timestamp
|
35 |
+
data["last_updated"] = datetime.now().isoformat()
|
36 |
+
|
37 |
+
with open(file_path, 'w') as f:
|
38 |
+
json.dump(data, f, indent=2)
|
39 |
+
|
40 |
+
|
41 |
+
def process_submission(submission_data: List[Dict]) -> List[Dict]:
|
42 |
+
"""
|
43 |
+
Process submission data and convert it to leaderboard entries.
|
44 |
+
"""
|
45 |
+
entries = []
|
46 |
+
|
47 |
+
for item in submission_data:
|
48 |
+
# Create a new entry for the leaderboard
|
49 |
+
entry = {
|
50 |
+
"model_name": item.get("model_name", "Unknown Model"),
|
51 |
+
"per_category_metrics": {},
|
52 |
+
"avg_metrics": {},
|
53 |
+
"submission_date": datetime.now().isoformat()
|
54 |
+
}
|
55 |
+
|
56 |
+
# Process per-category metrics
|
57 |
+
if "per_category_metrics" in item:
|
58 |
+
entry["per_category_metrics"] = item["per_category_metrics"]
|
59 |
+
|
60 |
+
# Process average metrics
|
61 |
+
if "avg_metrics" in item:
|
62 |
+
entry["avg_metrics"] = item["avg_metrics"]
|
63 |
+
|
64 |
+
entries.append(entry)
|
65 |
+
|
66 |
+
return entries
|
67 |
+
|
68 |
+
|
69 |
+
def leaderboard_to_dataframe(leaderboard_data: Dict) -> pd.DataFrame:
|
70 |
+
"""
|
71 |
+
Convert leaderboard data to a pandas DataFrame for display.
|
72 |
+
"""
|
73 |
+
rows = []
|
74 |
+
|
75 |
+
for entry in leaderboard_data.get("entries", []):
|
76 |
+
model_name = entry.get("model_name", "Unknown Model")
|
77 |
+
|
78 |
+
# Extract average metrics for main display
|
79 |
+
row = {
|
80 |
+
"model_name": model_name,
|
81 |
+
"model_type": entry.get("model_type", "Unknown"),
|
82 |
+
"submission_date": entry.get("submission_date", "")
|
83 |
+
}
|
84 |
+
|
85 |
+
# Add average metrics
|
86 |
+
avg_metrics = entry.get("avg_metrics", {})
|
87 |
+
for test_type in TEST_TYPES:
|
88 |
+
if test_type in avg_metrics:
|
89 |
+
for metric in METRICS:
|
90 |
+
if metric in avg_metrics[test_type]:
|
91 |
+
col_name = f"{test_type}_{metric}"
|
92 |
+
row[col_name] = avg_metrics[test_type][metric]
|
93 |
+
|
94 |
+
# Calculate overall averages for key metrics
|
95 |
+
f1_values = []
|
96 |
+
recall_values = []
|
97 |
+
precision_values = []
|
98 |
+
|
99 |
+
for test_type in TEST_TYPES:
|
100 |
+
if test_type in avg_metrics and "f1_binary" in avg_metrics[test_type]:
|
101 |
+
f1_values.append(avg_metrics[test_type]["f1_binary"])
|
102 |
+
if test_type in avg_metrics and "recall_binary" in avg_metrics[test_type]:
|
103 |
+
recall_values.append(avg_metrics[test_type]["recall_binary"])
|
104 |
+
if test_type in avg_metrics and "precision_binary" in avg_metrics[test_type]:
|
105 |
+
precision_values.append(avg_metrics[test_type]["precision_binary"])
|
106 |
+
|
107 |
+
# Add overall averages
|
108 |
+
if f1_values:
|
109 |
+
row["average_f1"] = sum(f1_values) / len(f1_values)
|
110 |
+
if recall_values:
|
111 |
+
row["average_recall"] = sum(recall_values) / len(recall_values)
|
112 |
+
if precision_values:
|
113 |
+
row["average_precision"] = sum(precision_values) / len(precision_values)
|
114 |
+
|
115 |
+
# Add specific test type F1 scores for display
|
116 |
+
if "default_prompts" in avg_metrics and "f1_binary" in avg_metrics["default_prompts"]:
|
117 |
+
row["default_prompts_f1"] = avg_metrics["default_prompts"]["f1_binary"]
|
118 |
+
if "jailbreaked_prompts" in avg_metrics and "f1_binary" in avg_metrics["jailbreaked_prompts"]:
|
119 |
+
row["jailbreaked_prompts_f1"] = avg_metrics["jailbreaked_prompts"]["f1_binary"]
|
120 |
+
if "default_answers" in avg_metrics and "f1_binary" in avg_metrics["default_answers"]:
|
121 |
+
row["default_answers_f1"] = avg_metrics["default_answers"]["f1_binary"]
|
122 |
+
if "jailbreaked_answers" in avg_metrics and "f1_binary" in avg_metrics["jailbreaked_answers"]:
|
123 |
+
row["jailbreaked_answers_f1"] = avg_metrics["jailbreaked_answers"]["f1_binary"]
|
124 |
+
|
125 |
+
rows.append(row)
|
126 |
+
|
127 |
+
# Create DataFrame and sort by average F1 score
|
128 |
+
df = pd.DataFrame(rows)
|
129 |
+
if not df.empty and "average_f1" in df.columns:
|
130 |
+
df = df.sort_values(by="average_f1", ascending=False)
|
131 |
+
|
132 |
+
return df
|
133 |
+
|
134 |
+
|
135 |
+
def add_entries_to_leaderboard(leaderboard_data: Dict, new_entries: List[Dict]) -> Dict:
|
136 |
+
"""
|
137 |
+
Add new entries to the leaderboard, replacing any with the same model name.
|
138 |
+
"""
|
139 |
+
# Create a mapping of existing entries by model name
|
140 |
+
existing_entries = {entry["model_name"]: i for i, entry in enumerate(leaderboard_data.get("entries", []))}
|
141 |
+
|
142 |
+
# Process each new entry
|
143 |
+
for new_entry in new_entries:
|
144 |
+
model_name = new_entry.get("model_name")
|
145 |
+
|
146 |
+
if model_name in existing_entries:
|
147 |
+
# Replace existing entry
|
148 |
+
leaderboard_data["entries"][existing_entries[model_name]] = new_entry
|
149 |
+
else:
|
150 |
+
# Add new entry
|
151 |
+
if "entries" not in leaderboard_data:
|
152 |
+
leaderboard_data["entries"] = []
|
153 |
+
leaderboard_data["entries"].append(new_entry)
|
154 |
+
|
155 |
+
# Update the last_updated timestamp
|
156 |
+
leaderboard_data["last_updated"] = datetime.now().isoformat()
|
157 |
+
|
158 |
+
return leaderboard_data
|
159 |
+
|
160 |
+
|
161 |
+
def process_jsonl_submission(file_path: str) -> Tuple[List[Dict], str]:
|
162 |
+
"""
|
163 |
+
Process a JSONL submission file and extract entries.
|
164 |
+
"""
|
165 |
+
entries = []
|
166 |
+
try:
|
167 |
+
with open(file_path, 'r') as f:
|
168 |
+
for line in f:
|
169 |
+
try:
|
170 |
+
entry = json.loads(line)
|
171 |
+
entries.append(entry)
|
172 |
+
except json.JSONDecodeError as e:
|
173 |
+
return [], f"Invalid JSON in submission file: {e}"
|
174 |
+
|
175 |
+
if not entries:
|
176 |
+
return [], "Submission file is empty"
|
177 |
+
|
178 |
+
return entries, "Successfully processed submission"
|
179 |
+
except Exception as e:
|
180 |
+
return [], f"Error processing submission file: {e}"
|
src/populate.py
ADDED
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Populate the GuardBench leaderboard from HuggingFace datasets.
|
3 |
+
"""
|
4 |
+
|
5 |
+
import json
|
6 |
+
import os
|
7 |
+
import pandas as pd
|
8 |
+
import tempfile
|
9 |
+
from typing import Dict, Tuple, List
|
10 |
+
from glob import glob
|
11 |
+
|
12 |
+
from huggingface_hub import snapshot_download, hf_hub_download, HfApi
|
13 |
+
from datasets import load_dataset
|
14 |
+
|
15 |
+
from src.display.utils import GUARDBENCH_COLUMN, DISPLAY_COLS, CATEGORIES
|
16 |
+
from src.envs import RESULTS_DATASET_ID, TOKEN, LEADERBOARD_FILE, CACHE_PATH
|
17 |
+
from src.leaderboard.processor import leaderboard_to_dataframe, load_leaderboard_data, save_leaderboard_data, process_jsonl_submission, add_entries_to_leaderboard
|
18 |
+
|
19 |
+
|
20 |
+
def download_leaderboard_data() -> bool:
|
21 |
+
"""
|
22 |
+
Download the latest leaderboard data from HuggingFace.
|
23 |
+
"""
|
24 |
+
try:
|
25 |
+
# Create a temporary directory to download the submissions
|
26 |
+
temp_dir = os.path.join(CACHE_PATH, "temp_submissions")
|
27 |
+
os.makedirs(temp_dir, exist_ok=True)
|
28 |
+
|
29 |
+
# Download the entire repository
|
30 |
+
try:
|
31 |
+
snapshot_path = snapshot_download(
|
32 |
+
repo_id=RESULTS_DATASET_ID,
|
33 |
+
repo_type="dataset",
|
34 |
+
local_dir=temp_dir,
|
35 |
+
token=TOKEN,
|
36 |
+
ignore_patterns=["*.md", ".*"],
|
37 |
+
etag_timeout=30
|
38 |
+
)
|
39 |
+
|
40 |
+
# Process all submission files
|
41 |
+
all_entries = []
|
42 |
+
submission_files = []
|
43 |
+
|
44 |
+
# Look for submission files in the submissions directory
|
45 |
+
submissions_dir = os.path.join(snapshot_path, "submissions")
|
46 |
+
if os.path.exists(submissions_dir):
|
47 |
+
submission_files.extend(glob(os.path.join(submissions_dir, "*.jsonl")))
|
48 |
+
|
49 |
+
# Also look for any JSONL files in the root
|
50 |
+
submission_files.extend(glob(os.path.join(snapshot_path, "*.jsonl")))
|
51 |
+
|
52 |
+
# Process each submission file
|
53 |
+
for file_path in submission_files:
|
54 |
+
entries, _ = process_jsonl_submission(file_path)
|
55 |
+
all_entries.extend(entries)
|
56 |
+
|
57 |
+
# Create leaderboard data structure
|
58 |
+
leaderboard_data = {
|
59 |
+
"entries": all_entries,
|
60 |
+
"last_updated": pd.Timestamp.now().isoformat()
|
61 |
+
}
|
62 |
+
|
63 |
+
# Save to local file
|
64 |
+
save_leaderboard_data(leaderboard_data, LEADERBOARD_FILE)
|
65 |
+
|
66 |
+
return True
|
67 |
+
except Exception as e:
|
68 |
+
print(f"Error downloading repository: {e}")
|
69 |
+
|
70 |
+
# If we can't download the repository, try to download individual files
|
71 |
+
try:
|
72 |
+
api = HfApi(token=TOKEN)
|
73 |
+
files = api.list_repo_files(repo_id=RESULTS_DATASET_ID, repo_type="dataset")
|
74 |
+
|
75 |
+
submission_files = [f for f in files if f.endswith('.jsonl')]
|
76 |
+
all_entries = []
|
77 |
+
|
78 |
+
for file_path in submission_files:
|
79 |
+
try:
|
80 |
+
local_path = hf_hub_download(
|
81 |
+
repo_id=RESULTS_DATASET_ID,
|
82 |
+
filename=file_path,
|
83 |
+
repo_type="dataset",
|
84 |
+
token=TOKEN
|
85 |
+
)
|
86 |
+
entries, _ = process_jsonl_submission(local_path)
|
87 |
+
all_entries.extend(entries)
|
88 |
+
except Exception as file_error:
|
89 |
+
print(f"Error downloading file {file_path}: {file_error}")
|
90 |
+
|
91 |
+
# Create leaderboard data structure
|
92 |
+
leaderboard_data = {
|
93 |
+
"entries": all_entries,
|
94 |
+
"last_updated": pd.Timestamp.now().isoformat()
|
95 |
+
}
|
96 |
+
|
97 |
+
# Save to local file
|
98 |
+
save_leaderboard_data(leaderboard_data, LEADERBOARD_FILE)
|
99 |
+
|
100 |
+
return True
|
101 |
+
except Exception as list_error:
|
102 |
+
print(f"Error listing repository files: {list_error}")
|
103 |
+
|
104 |
+
# If we can't download anything, create an empty leaderboard
|
105 |
+
if not os.path.exists(LEADERBOARD_FILE):
|
106 |
+
empty_data = {"entries": [], "last_updated": pd.Timestamp.now().isoformat()}
|
107 |
+
save_leaderboard_data(empty_data, LEADERBOARD_FILE)
|
108 |
+
|
109 |
+
return False
|
110 |
+
except Exception as e:
|
111 |
+
print(f"Error downloading leaderboard data: {e}")
|
112 |
+
|
113 |
+
# Ensure we have at least an empty leaderboard file
|
114 |
+
if not os.path.exists(LEADERBOARD_FILE):
|
115 |
+
empty_data = {"entries": [], "last_updated": pd.Timestamp.now().isoformat()}
|
116 |
+
save_leaderboard_data(empty_data, LEADERBOARD_FILE)
|
117 |
+
|
118 |
+
return False
|
119 |
+
|
120 |
+
|
121 |
+
def get_leaderboard_df() -> pd.DataFrame:
|
122 |
+
"""
|
123 |
+
Get the leaderboard data as a DataFrame.
|
124 |
+
"""
|
125 |
+
# Try to download the latest data
|
126 |
+
download_leaderboard_data()
|
127 |
+
|
128 |
+
# Load from local file
|
129 |
+
leaderboard_data = load_leaderboard_data(LEADERBOARD_FILE)
|
130 |
+
|
131 |
+
# Convert to DataFrame
|
132 |
+
df = leaderboard_to_dataframe(leaderboard_data)
|
133 |
+
|
134 |
+
return df
|
135 |
+
|
136 |
+
|
137 |
+
def get_category_leaderboard_df(category: str) -> pd.DataFrame:
|
138 |
+
"""
|
139 |
+
Get the leaderboard data filtered by a specific category.
|
140 |
+
|
141 |
+
Args:
|
142 |
+
category: The category to filter by (e.g., "Criminal, Violent, and Terrorist Activity")
|
143 |
+
|
144 |
+
Returns:
|
145 |
+
DataFrame with metrics for the specified category
|
146 |
+
"""
|
147 |
+
# Load the leaderboard data
|
148 |
+
leaderboard_data = load_leaderboard_data(LEADERBOARD_FILE)
|
149 |
+
|
150 |
+
# Filter entries to only include those with data for the specified category
|
151 |
+
filtered_entries = []
|
152 |
+
|
153 |
+
for entry in leaderboard_data.get("entries", []):
|
154 |
+
# Check if the entry has data for this category
|
155 |
+
if "per_category_metrics" in entry and category in entry["per_category_metrics"]:
|
156 |
+
# Create a new entry with just the overall info and this category's metrics
|
157 |
+
filtered_entry = {
|
158 |
+
"model_name": entry.get("model_name", "Unknown Model"),
|
159 |
+
"model_type": entry.get("model_type", "Unknown"),
|
160 |
+
"submission_date": entry.get("submission_date", ""),
|
161 |
+
}
|
162 |
+
|
163 |
+
# Extract metrics for this category
|
164 |
+
category_metrics = entry["per_category_metrics"][category]
|
165 |
+
|
166 |
+
# Add metrics for each test type
|
167 |
+
for test_type in category_metrics:
|
168 |
+
if test_type and isinstance(category_metrics[test_type], dict):
|
169 |
+
for metric, value in category_metrics[test_type].items():
|
170 |
+
col_name = f"{test_type}_{metric}"
|
171 |
+
filtered_entry[col_name] = value
|
172 |
+
|
173 |
+
# Calculate average F1 for this category
|
174 |
+
f1_values = []
|
175 |
+
for test_type in category_metrics:
|
176 |
+
if test_type and isinstance(category_metrics[test_type], dict) and "f1_binary" in category_metrics[test_type]:
|
177 |
+
f1_values.append(category_metrics[test_type]["f1_binary"])
|
178 |
+
|
179 |
+
if f1_values:
|
180 |
+
filtered_entry["average_f1"] = sum(f1_values) / len(f1_values)
|
181 |
+
|
182 |
+
# Add specific test type F1 scores for display
|
183 |
+
for test_type in ["default_prompts", "jailbreaked_prompts", "default_answers", "jailbreaked_answers"]:
|
184 |
+
if test_type in category_metrics and "f1_binary" in category_metrics[test_type]:
|
185 |
+
filtered_entry[f"{test_type}_f1"] = category_metrics[test_type]["f1_binary"]
|
186 |
+
|
187 |
+
filtered_entries.append(filtered_entry)
|
188 |
+
|
189 |
+
# Create a new leaderboard data structure with the filtered entries
|
190 |
+
filtered_leaderboard = {
|
191 |
+
"entries": filtered_entries,
|
192 |
+
"last_updated": leaderboard_data.get("last_updated", pd.Timestamp.now().isoformat())
|
193 |
+
}
|
194 |
+
|
195 |
+
# Convert to DataFrame
|
196 |
+
df = leaderboard_to_dataframe(filtered_leaderboard)
|
197 |
+
|
198 |
+
return df
|
199 |
+
|
200 |
+
|
201 |
+
def get_detailed_model_data(model_name: str) -> Dict:
|
202 |
+
"""
|
203 |
+
Get detailed data for a specific model.
|
204 |
+
"""
|
205 |
+
leaderboard_data = load_leaderboard_data(LEADERBOARD_FILE)
|
206 |
+
|
207 |
+
for entry in leaderboard_data.get("entries", []):
|
208 |
+
if entry.get("model_name") == model_name:
|
209 |
+
return entry
|
210 |
+
|
211 |
+
return {}
|
src/submission/submit.py
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Handle submissions to the GuardBench leaderboard.
|
3 |
+
"""
|
4 |
+
|
5 |
+
import json
|
6 |
+
import os
|
7 |
+
import tempfile
|
8 |
+
import uuid
|
9 |
+
from datetime import datetime
|
10 |
+
from typing import Dict, List, Tuple
|
11 |
+
|
12 |
+
from huggingface_hub import HfApi
|
13 |
+
from datasets import load_dataset, Dataset
|
14 |
+
|
15 |
+
from src.display.formatting import styled_error, styled_message, styled_warning
|
16 |
+
from src.envs import API, RESULTS_DATASET_ID, TOKEN
|
17 |
+
from src.leaderboard.processor import process_jsonl_submission, add_entries_to_leaderboard, load_leaderboard_data
|
18 |
+
|
19 |
+
|
20 |
+
def validate_submission(file_path: str) -> Tuple[bool, str]:
|
21 |
+
"""
|
22 |
+
Validate a submission file.
|
23 |
+
"""
|
24 |
+
try:
|
25 |
+
entries, message = process_jsonl_submission(file_path)
|
26 |
+
if not entries:
|
27 |
+
return False, message
|
28 |
+
|
29 |
+
# Additional validation could be added here
|
30 |
+
|
31 |
+
return True, "Submission is valid"
|
32 |
+
except Exception as e:
|
33 |
+
return False, f"Error validating submission: {e}"
|
34 |
+
|
35 |
+
|
36 |
+
def submit_to_hub(file_path: str, metadata: Dict, dataset_id: str, token: str) -> Tuple[bool, str]:
|
37 |
+
"""
|
38 |
+
Submit results to a HuggingFace dataset repository as individual files.
|
39 |
+
"""
|
40 |
+
try:
|
41 |
+
# Process the submission file to validate
|
42 |
+
entries, message = process_jsonl_submission(file_path)
|
43 |
+
if not entries:
|
44 |
+
return False, message
|
45 |
+
|
46 |
+
# Generate a unique submission ID
|
47 |
+
model_name = metadata.get("model_name", "unknown")
|
48 |
+
model_name_safe = model_name.replace("/", "_").replace(" ", "_")
|
49 |
+
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
50 |
+
submission_id = f"{model_name_safe}_{timestamp}"
|
51 |
+
|
52 |
+
# Create an API instance
|
53 |
+
api = HfApi(token=token)
|
54 |
+
|
55 |
+
# Create a temporary file with metadata added
|
56 |
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False) as temp_file:
|
57 |
+
# Add metadata to each entry
|
58 |
+
for entry in entries:
|
59 |
+
# If the entry already has a model_name, don't override it
|
60 |
+
if "model_name" not in entry:
|
61 |
+
entry["model_name"] = metadata.get("model_name")
|
62 |
+
|
63 |
+
# Add other metadata if not present
|
64 |
+
for key, value in metadata.items():
|
65 |
+
if key != "model_name" and key not in entry:
|
66 |
+
entry[key] = value
|
67 |
+
|
68 |
+
# Write to temp file
|
69 |
+
temp_file.write(json.dumps(entry) + "\n")
|
70 |
+
|
71 |
+
temp_path = temp_file.name
|
72 |
+
|
73 |
+
# Upload the file directly to the repository
|
74 |
+
submission_path = f"submissions/{submission_id}.jsonl"
|
75 |
+
api.upload_file(
|
76 |
+
path_or_fileobj=temp_path,
|
77 |
+
path_in_repo=submission_path,
|
78 |
+
repo_id=dataset_id,
|
79 |
+
repo_type="dataset",
|
80 |
+
commit_message=f"Add submission for {model_name}"
|
81 |
+
)
|
82 |
+
|
83 |
+
# Clean up the temporary file
|
84 |
+
os.unlink(temp_path)
|
85 |
+
|
86 |
+
return True, f"Successfully uploaded submission for {model_name} to {dataset_id}"
|
87 |
+
except Exception as e:
|
88 |
+
return False, f"Error submitting to dataset: {e}"
|
89 |
+
|
90 |
+
|
91 |
+
def process_submission(file_path: str, metadata: Dict) -> str:
|
92 |
+
"""
|
93 |
+
Process a submission to the GuardBench leaderboard.
|
94 |
+
"""
|
95 |
+
# Validate submission file
|
96 |
+
is_valid, validation_message = validate_submission(file_path)
|
97 |
+
if not is_valid:
|
98 |
+
return styled_error(validation_message)
|
99 |
+
|
100 |
+
# Submit to HuggingFace dataset repository
|
101 |
+
success, message = submit_to_hub(file_path, metadata, RESULTS_DATASET_ID, TOKEN)
|
102 |
+
if not success:
|
103 |
+
return styled_error(message)
|
104 |
+
|
105 |
+
return styled_message(f"Submission successful! {message}")
|