H2H-eval-comparator

Sleeping

App Files Files Community

H2H-eval-comparator / configs /dataset_config.py

rohansampath

Create configs/dataset_config.py

d093a4d verified 6 months ago

raw

history blame contribute delete

12.5 kB

	import gradio as gr
	import pandas as pd
	from dataset_previews import mmlupro_dataset_preview, format_preview_for_display

	def create_dataset_section():
	"""
	Creates the dataset selection section (Section A) of the UI.

	Returns:
	dict: Dictionary containing UI components and containers.
	"""
	# Dataset Selection Section
	header = gr.Markdown("## (A) Select Dataset for Evaluation")

	with gr.Row():
	dataset_dropdown = gr.Dropdown(
	choices=["(Select Dataset)", "MMLU-Pro"],
	value="(Select Dataset)",
	label="Dataset",
	info="Select a dataset to perform the Head-to-Head Evaluation on. Available Datasets: [MMLU-Pro](https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro)"
	)
	preview_toggle = gr.Button("Show Dataset Preview", interactive=False, variant="secondary")

	# Dataset Preview Container - Initially hidden
	with gr.Column(visible=False) as dataset_preview_container:
	gr.Markdown("## Dataset Preview", elem_id="preview_header")
	preview_output = gr.DataFrame(
	interactive=False,
	wrap=True,
	elem_id="preview_table"
	)
	# Add a divider
	gr.Markdown("<div class='section-divider'></div>")

	# Preview data state to store the loaded preview data
	preview_data_state = gr.State(None)

	# Return components dictionary
	return {
	'header': header,
	'dropdown': dataset_dropdown,
	'preview_toggle': preview_toggle,
	'preview_container': dataset_preview_container,
	'preview_output': preview_output,
	'preview_data_state': preview_data_state
	}

	def create_mmlu_config_section():
	"""
	Creates the dataset configuration section (Section B) of the UI.

	Returns:
	dict: Dictionary containing UI components and containers.
	"""
	with gr.Column(visible=False) as mmlu_config_container:
	gr.Markdown("## (B) Select Dataset Configuration Options")

	with gr.Row():
	# Left column for subject selection
	with gr.Column(scale=1):
	with gr.Group(elem_classes=["config-box"]):
	gr.Markdown("### Choose Subjects")

	subject_selection_mode = gr.Radio(
	choices=["Evaluate All Subjects", "Choose Number of Subjects", "Specify which Subjects to Evaluate"],
	value="Evaluate All Subjects",
	label="Subject Selection Mode"
	)

	# Subject number slider - initially hidden
	with gr.Column(visible=False) as num_subjects_container:
	num_subjects_slider = gr.Slider(
	minimum=1,
	maximum=14, # Will be updated dynamically based on preview data
	value=1,
	step=1,
	label="Number of Subjects",
	info="Number of subjects to evaluate. They will be loaded in alphabetical order."
	)

	# Subject checkboxes - initially hidden
	with gr.Column(visible=False) as specific_subjects_container:
	# Will be populated dynamically from the preview data
	specific_subjects = gr.CheckboxGroup(
	choices=[], # Will be populated from preview data
	label="Select Specific Subjects",
	info="Select which specific subjects to evaluate"
	)

	# Right column for sample configuration
	with gr.Column(scale=1):
	with gr.Group(elem_classes=["config-box"]):
	gr.Markdown("### Sample Configuration")

	all_questions_checkbox = gr.Checkbox(
	label="Evaluate All Questions",
	value=False,
	info="When checked, evaluates all available questions for each subject"
	)

	questions_info_text = gr.Markdown(visible=False, value="All questions across the selected subjects will be evaluated")

	# Questions per subject slider
	num_questions_slider = gr.Slider(
	minimum=1,
	maximum=100,
	value=20,
	step=1,
	label="Questions per Subject",
	info="Choose a subset of questions (1-100) per subject. They will be loaded in order of question_id.",
	interactive=True
	)

	# Return components dictionary
	return {
	'container': mmlu_config_container,
	'subject_selection_mode': subject_selection_mode,
	'num_subjects_container': num_subjects_container,
	'num_subjects_slider': num_subjects_slider,
	'specific_subjects_container': specific_subjects_container,
	'specific_subjects': specific_subjects,
	'all_questions_checkbox': all_questions_checkbox,
	'questions_info_text': questions_info_text,
	'num_questions_slider': num_questions_slider
	}

	# Utility functions for dataset section
	def get_subject_choices_from_preview(preview_data):
	"""
	Extracts subject choices from preview data.

	Args:
	preview_data (dict): Preview data containing subject counts.

	Returns:
	tuple: (formatted_subjects, subject_count)
	"""
	if not preview_data or 'subject_counts' not in preview_data:
	return [], 0

	# Get subject counts from preview data
	subject_counts = preview_data['subject_counts']

	# Sort subjects alphabetically
	subjects = sorted(subject_counts.keys())

	# Format as "Subject (n=count)"
	formatted_subjects = [f"{subject} (n={subject_counts[subject]})" for subject in subjects]

	return formatted_subjects, len(subjects)

	def load_dataset_preview(dataset):
	"""
	Loads preview data for the selected dataset.

	Args:
	dataset (str): Selected dataset name.

	Returns:
	tuple: (preview_data, specific_subjects_update, num_subjects_slider_update)
	"""
	if dataset == "MMLU-Pro":
	# Load the preview data
	preview_data = mmlupro_dataset_preview(regenerate_preview=True)

	# Extract subject choices and count
	subject_choices, subject_count = get_subject_choices_from_preview(preview_data)

	# Update the UI components
	return (
	preview_data, # Store the preview data
	gr.update(choices=subject_choices), # Update checkbox choices
	gr.update(maximum=subject_count, value=1) # Update slider max
	)
	return None, gr.update(), gr.update()

	def update_interface_based_on_dataset(dataset, current_visibility,
	mmlu_config_container, model_config_container,
	results_container, preview_toggle,
	dataset_preview_container):
	"""
	Updates the interface based on dataset selection.

	Args:
	dataset (str): Selected dataset name.
	current_visibility (bool): Current preview visibility state.
	mmlu_config_container: MMLU config container component.
	model_config_container: Model config container component.
	results_container: Results container component.
	preview_toggle: Preview toggle button.
	dataset_preview_container: Dataset preview container.

	Returns:
	tuple: Updates for UI components.
	"""
	if dataset == "MMLU-Pro":
	return (
	gr.update(visible=True), # mmlu_config_container
	gr.update(visible=True), # model_config_container
	gr.update(visible=True), # results_container
	gr.update(interactive=True), # preview_toggle
	gr.update(visible=False), # dataset_preview_container - hide it initially
	False, # Reset preview_visibility to False
	gr.update(value="Show Dataset Preview") # Reset button text
	)
	else:
	return (
	gr.update(visible=False), # mmlu_config_container
	gr.update(visible=False), # model_config_container
	gr.update(visible=False), # results_container
	gr.update(interactive=False), # preview_toggle
	gr.update(visible=False), # dataset_preview_container - hide when no dataset
	False, # Reset preview_visibility to False
	gr.update(value="Show Dataset Preview") # Reset button text
	)

	def toggle_preview(dataset, preview_visibility, preview_data):
	"""
	Toggles the dataset preview visibility.

	Args:
	dataset (str): Selected dataset name.
	preview_visibility (bool): Current preview visibility state.
	preview_data (dict): Preview data.

	Returns:
	tuple: (new_visibility, preview_container_update, preview_output_update, button_text_update)
	"""
	# Toggle the visibility state
	is_visible = not preview_visibility

	# Update button text based on new state
	button_text = "Hide Dataset Preview" if is_visible else "Show Dataset Preview"

	# Format and show preview if becoming visible
	if is_visible and dataset == "MMLU-Pro":
	formatted_preview = format_preview_for_display(preview_data)
	return is_visible, gr.update(visible=True), formatted_preview, gr.update(value=button_text)
	elif is_visible:
	# For other datasets (not implemented yet)
	return is_visible, gr.update(visible=True), None, gr.update(value=button_text)
	else:
	# Hiding the preview
	return is_visible, gr.update(visible=False), None, gr.update(value=button_text)

	def update_subject_selection_ui(mode, num_subjects_container, specific_subjects_container):
	"""
	Updates UI based on subject selection mode.

	Args:
	mode (str): Selected subject selection mode.
	num_subjects_container: Container for number of subjects slider.
	specific_subjects_container: Container for specific subjects checkboxes.

	Returns:
	tuple: (num_subjects_container_update, specific_subjects_container_update)
	"""
	if mode == "Evaluate All Subjects":
	return gr.update(visible=False), gr.update(visible=False)
	elif mode == "Choose Number of Subjects":
	return gr.update(visible=True), gr.update(visible=False)
	else: # "Specify which Subjects to Evaluate"
	return gr.update(visible=False), gr.update(visible=True)

	def update_questions_interface(checked, num_questions_slider, questions_info_text):
	"""
	Updates questions interface based on "Evaluate All Questions" checkbox.

	Args:
	checked (bool): Whether "Evaluate All Questions" is checked.
	num_questions_slider: Questions per subject slider component.
	questions_info_text: Questions info text component.

	Returns:
	tuple: (num_questions_slider_update, questions_info_text_update)
	"""
	if checked:
	return gr.update(visible=False), gr.update(visible=True)
	else:
	return gr.update(visible=True), gr.update(visible=False)

	def get_subject_mode_param(mode):
	"""
	Converts subject selection mode to parameter string.

	Args:
	mode (str): Subject selection mode.

	Returns:
	str: Parameter value for evaluation function.
	"""
	if mode == "Evaluate All Subjects":
	return "all"
	elif mode == "Choose Number of Subjects":
	return "number"
	else: # "Specify which Subjects to Evaluate"
	return "specific"

	def get_subject_names(selected_subjects):
	"""
	Extracts subject names from checkbox values.

	Args:
	selected_subjects (list): Selected subjects with counts.

	Returns:
	list: Clean subject names without count information.
	"""
	# Extract just the subject name without the count
	return [subject.split(" (")[0] for subject in selected_subjects]