Spaces:
Sleeping
Sleeping
File size: 12,468 Bytes
d093a4d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 |
import gradio as gr
import pandas as pd
from dataset_previews import mmlupro_dataset_preview, format_preview_for_display
def create_dataset_section():
"""
Creates the dataset selection section (Section A) of the UI.
Returns:
dict: Dictionary containing UI components and containers.
"""
# Dataset Selection Section
header = gr.Markdown("## (A) Select Dataset for Evaluation")
with gr.Row():
dataset_dropdown = gr.Dropdown(
choices=["(Select Dataset)", "MMLU-Pro"],
value="(Select Dataset)",
label="Dataset",
info="Select a dataset to perform the Head-to-Head Evaluation on. Available Datasets: [MMLU-Pro](https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro)"
)
preview_toggle = gr.Button("Show Dataset Preview", interactive=False, variant="secondary")
# Dataset Preview Container - Initially hidden
with gr.Column(visible=False) as dataset_preview_container:
gr.Markdown("## Dataset Preview", elem_id="preview_header")
preview_output = gr.DataFrame(
interactive=False,
wrap=True,
elem_id="preview_table"
)
# Add a divider
gr.Markdown("<div class='section-divider'></div>")
# Preview data state to store the loaded preview data
preview_data_state = gr.State(None)
# Return components dictionary
return {
'header': header,
'dropdown': dataset_dropdown,
'preview_toggle': preview_toggle,
'preview_container': dataset_preview_container,
'preview_output': preview_output,
'preview_data_state': preview_data_state
}
def create_mmlu_config_section():
"""
Creates the dataset configuration section (Section B) of the UI.
Returns:
dict: Dictionary containing UI components and containers.
"""
with gr.Column(visible=False) as mmlu_config_container:
gr.Markdown("## (B) Select Dataset Configuration Options")
with gr.Row():
# Left column for subject selection
with gr.Column(scale=1):
with gr.Group(elem_classes=["config-box"]):
gr.Markdown("### Choose Subjects")
subject_selection_mode = gr.Radio(
choices=["Evaluate All Subjects", "Choose Number of Subjects", "Specify which Subjects to Evaluate"],
value="Evaluate All Subjects",
label="Subject Selection Mode"
)
# Subject number slider - initially hidden
with gr.Column(visible=False) as num_subjects_container:
num_subjects_slider = gr.Slider(
minimum=1,
maximum=14, # Will be updated dynamically based on preview data
value=1,
step=1,
label="Number of Subjects",
info="Number of subjects to evaluate. They will be loaded in alphabetical order."
)
# Subject checkboxes - initially hidden
with gr.Column(visible=False) as specific_subjects_container:
# Will be populated dynamically from the preview data
specific_subjects = gr.CheckboxGroup(
choices=[], # Will be populated from preview data
label="Select Specific Subjects",
info="Select which specific subjects to evaluate"
)
# Right column for sample configuration
with gr.Column(scale=1):
with gr.Group(elem_classes=["config-box"]):
gr.Markdown("### Sample Configuration")
all_questions_checkbox = gr.Checkbox(
label="Evaluate All Questions",
value=False,
info="When checked, evaluates all available questions for each subject"
)
questions_info_text = gr.Markdown(visible=False, value="**All questions across the selected subjects will be evaluated**")
# Questions per subject slider
num_questions_slider = gr.Slider(
minimum=1,
maximum=100,
value=20,
step=1,
label="Questions per Subject",
info="Choose a subset of questions (1-100) per subject. They will be loaded in order of question_id.",
interactive=True
)
# Return components dictionary
return {
'container': mmlu_config_container,
'subject_selection_mode': subject_selection_mode,
'num_subjects_container': num_subjects_container,
'num_subjects_slider': num_subjects_slider,
'specific_subjects_container': specific_subjects_container,
'specific_subjects': specific_subjects,
'all_questions_checkbox': all_questions_checkbox,
'questions_info_text': questions_info_text,
'num_questions_slider': num_questions_slider
}
# Utility functions for dataset section
def get_subject_choices_from_preview(preview_data):
"""
Extracts subject choices from preview data.
Args:
preview_data (dict): Preview data containing subject counts.
Returns:
tuple: (formatted_subjects, subject_count)
"""
if not preview_data or 'subject_counts' not in preview_data:
return [], 0
# Get subject counts from preview data
subject_counts = preview_data['subject_counts']
# Sort subjects alphabetically
subjects = sorted(subject_counts.keys())
# Format as "Subject (n=count)"
formatted_subjects = [f"{subject} (n={subject_counts[subject]})" for subject in subjects]
return formatted_subjects, len(subjects)
def load_dataset_preview(dataset):
"""
Loads preview data for the selected dataset.
Args:
dataset (str): Selected dataset name.
Returns:
tuple: (preview_data, specific_subjects_update, num_subjects_slider_update)
"""
if dataset == "MMLU-Pro":
# Load the preview data
preview_data = mmlupro_dataset_preview(regenerate_preview=True)
# Extract subject choices and count
subject_choices, subject_count = get_subject_choices_from_preview(preview_data)
# Update the UI components
return (
preview_data, # Store the preview data
gr.update(choices=subject_choices), # Update checkbox choices
gr.update(maximum=subject_count, value=1) # Update slider max
)
return None, gr.update(), gr.update()
def update_interface_based_on_dataset(dataset, current_visibility,
mmlu_config_container, model_config_container,
results_container, preview_toggle,
dataset_preview_container):
"""
Updates the interface based on dataset selection.
Args:
dataset (str): Selected dataset name.
current_visibility (bool): Current preview visibility state.
mmlu_config_container: MMLU config container component.
model_config_container: Model config container component.
results_container: Results container component.
preview_toggle: Preview toggle button.
dataset_preview_container: Dataset preview container.
Returns:
tuple: Updates for UI components.
"""
if dataset == "MMLU-Pro":
return (
gr.update(visible=True), # mmlu_config_container
gr.update(visible=True), # model_config_container
gr.update(visible=True), # results_container
gr.update(interactive=True), # preview_toggle
gr.update(visible=False), # dataset_preview_container - hide it initially
False, # Reset preview_visibility to False
gr.update(value="Show Dataset Preview") # Reset button text
)
else:
return (
gr.update(visible=False), # mmlu_config_container
gr.update(visible=False), # model_config_container
gr.update(visible=False), # results_container
gr.update(interactive=False), # preview_toggle
gr.update(visible=False), # dataset_preview_container - hide when no dataset
False, # Reset preview_visibility to False
gr.update(value="Show Dataset Preview") # Reset button text
)
def toggle_preview(dataset, preview_visibility, preview_data):
"""
Toggles the dataset preview visibility.
Args:
dataset (str): Selected dataset name.
preview_visibility (bool): Current preview visibility state.
preview_data (dict): Preview data.
Returns:
tuple: (new_visibility, preview_container_update, preview_output_update, button_text_update)
"""
# Toggle the visibility state
is_visible = not preview_visibility
# Update button text based on new state
button_text = "Hide Dataset Preview" if is_visible else "Show Dataset Preview"
# Format and show preview if becoming visible
if is_visible and dataset == "MMLU-Pro":
formatted_preview = format_preview_for_display(preview_data)
return is_visible, gr.update(visible=True), formatted_preview, gr.update(value=button_text)
elif is_visible:
# For other datasets (not implemented yet)
return is_visible, gr.update(visible=True), None, gr.update(value=button_text)
else:
# Hiding the preview
return is_visible, gr.update(visible=False), None, gr.update(value=button_text)
def update_subject_selection_ui(mode, num_subjects_container, specific_subjects_container):
"""
Updates UI based on subject selection mode.
Args:
mode (str): Selected subject selection mode.
num_subjects_container: Container for number of subjects slider.
specific_subjects_container: Container for specific subjects checkboxes.
Returns:
tuple: (num_subjects_container_update, specific_subjects_container_update)
"""
if mode == "Evaluate All Subjects":
return gr.update(visible=False), gr.update(visible=False)
elif mode == "Choose Number of Subjects":
return gr.update(visible=True), gr.update(visible=False)
else: # "Specify which Subjects to Evaluate"
return gr.update(visible=False), gr.update(visible=True)
def update_questions_interface(checked, num_questions_slider, questions_info_text):
"""
Updates questions interface based on "Evaluate All Questions" checkbox.
Args:
checked (bool): Whether "Evaluate All Questions" is checked.
num_questions_slider: Questions per subject slider component.
questions_info_text: Questions info text component.
Returns:
tuple: (num_questions_slider_update, questions_info_text_update)
"""
if checked:
return gr.update(visible=False), gr.update(visible=True)
else:
return gr.update(visible=True), gr.update(visible=False)
def get_subject_mode_param(mode):
"""
Converts subject selection mode to parameter string.
Args:
mode (str): Subject selection mode.
Returns:
str: Parameter value for evaluation function.
"""
if mode == "Evaluate All Subjects":
return "all"
elif mode == "Choose Number of Subjects":
return "number"
else: # "Specify which Subjects to Evaluate"
return "specific"
def get_subject_names(selected_subjects):
"""
Extracts subject names from checkbox values.
Args:
selected_subjects (list): Selected subjects with counts.
Returns:
list: Clean subject names without count information.
"""
# Extract just the subject name without the count
return [subject.split(" (")[0] for subject in selected_subjects] |