Spaces:

mib-bench
/

leaderboard

Running

leaderboard / custom-select-columns.py

Aaron Mueller

updated filtering, add F= tab

1d8e193 6 months ago

25.2 kB

	import gradio as gr
	import pandas as pd
	from typing import List, Dict, Union, Optional

	class SmartSelectColumns(gr.SelectColumns):
	"""
	Enhanced SelectColumns component that supports substring matching and column mapping.
	Inherits from gr.SelectColumns but adds additional filtering capabilities.
	"""
	def __init__(
	self,
	*args,
	column_filters: Optional[Dict[str, List[str]]] = None,
	column_mapping: Optional[Dict[str, str]] = None,
	**kwargs
	):
	"""
	Initialize the SmartSelectColumns component.

	Args:
	column_filters: Dict mapping filter names to lists of substrings to match
	column_mapping: Dict mapping display names to actual column names
	args, *kwargs: Arguments passed to parent SelectColumns
	"""
	super().__init__(args, *kwargs)
	self.column_filters = column_filters or {}
	self.column_mapping = column_mapping or {}

	def preprocess(self, x: List[str]) -> List[str]:
	"""Transform selected display names back to actual column names."""
	if self.column_mapping:
	reverse_mapping = {v: k for k, v in self.column_mapping.items()}
	return [reverse_mapping.get(col, col) for col in x]
	return x

	def get_filtered_columns(self, df: pd.DataFrame) -> Dict[str, List[str]]:
	"""
	Get columns filtered by substring matches.

	Args:
	df: Input DataFrame

	Returns:
	Dict mapping filter names to lists of matching columns
	"""
	filtered_cols = {}

	for filter_name, substrings in self.column_filters.items():
	matching_cols = []
	for col in df.columns:
	if any(substr.lower() in col.lower() for substr in substrings):
	matching_cols.append(col)
	filtered_cols[filter_name] = matching_cols

	return filtered_cols

	def update(
	self,
	value: Union[pd.DataFrame, Dict[str, List[str]]],
	interactive: Optional[bool] = None
	) -> Dict:
	"""
	Update the component with new values.

	Args:
	value: Either a DataFrame or dict of predefined column groups
	interactive: Whether the component should be interactive

	Returns:
	Dict containing the update configuration
	"""
	if isinstance(value, pd.DataFrame):
	# Get filtered column groups
	filtered_cols = self.get_filtered_columns(value)

	# Create display names for columns if mapping exists
	choices = list(value.columns)
	if self.column_mapping:
	choices = [self.column_mapping.get(col, col) for col in choices]

	return {
	"choices": choices,
	"filtered_cols": filtered_cols,
	"interactive": interactive if interactive is not None else self.interactive
	}
	return super().update(value, interactive)

	# Example usage
	if __name__ == "__main__":
	df = pd.DataFrame({
	"ioi_score_1": [1, 2, 3],
	"ioi_score_2": [4, 5, 6],
	"other_metric": [7, 8, 9],
	"performance_1": [10, 11, 12]
	})

	# Define filters and mappings
	column_filters = {
	"IOI": ["ioi"],
	"Performance Metrics": ["performance"]
	}

	column_mapping = {
	"ioi_score_1": "IOI Score (Type 1)",
	"ioi_score_2": "IOI Score (Type 2)",
	"other_metric": "Other Metric",
	"performance_1": "Performance Metric 1"
	}

	# Create interface
	with gr.Blocks() as demo:
	select_cols = SmartSelectColumns(
	column_filters=column_filters,
	column_mapping=column_mapping,
	multiselect=True
	)

	# Update component with DataFrame
	select_cols.update(df)

	demo.launch()
































	import gradio as gr
	import pandas as pd
	from typing import List, Dict, Union, Optional, Any
	from dataclasses import fields

	class SmartSelectColumns(gr.SelectColumns):
	"""
	Enhanced SelectColumns component for Gradio Leaderboard with smart filtering and mapping capabilities.
	"""
	def __init__(
	self,
	column_filters: Optional[Dict[str, List[str]]] = None,
	column_mapping: Optional[Dict[str, str]] = None,
	initial_selected: Optional[List[str]] = None,
	*args,
	**kwargs
	):
	"""
	Initialize SmartSelectColumns with enhanced functionality.

	Args:
	column_filters: Dict mapping filter names to lists of substrings to match
	column_mapping: Dict mapping actual column names to display names
	initial_selected: List of column names to be initially selected
	args, *kwargs: Additional arguments passed to parent SelectColumns
	"""
	super().__init__(args, *kwargs)
	self.column_filters = column_filters or {}
	self.column_mapping = column_mapping or {}
	self.reverse_mapping = {v: k for k, v in self.column_mapping.items()} if column_mapping else {}
	self.initial_selected = initial_selected or []

	def preprocess(self, x: List[str]) -> List[str]:
	"""
	Transform selected display names back to actual column names.

	Args:
	x: List of selected display names

	Returns:
	List of actual column names
	"""
	return [self.reverse_mapping.get(col, col) for col in x]

	def postprocess(self, y: List[str]) -> List[str]:
	"""
	Transform actual column names to display names.

	Args:
	y: List of actual column names

	Returns:
	List of display names
	"""
	return [self.column_mapping.get(col, col) for col in y]

	def get_filtered_columns(self, df: pd.DataFrame) -> Dict[str, List[str]]:
	"""
	Get columns filtered by substring matches.

	Args:
	df: Input DataFrame

	Returns:
	Dict mapping filter names to lists of matching display names
	"""
	filtered_cols = {}

	for filter_name, substrings in self.column_filters.items():
	matching_cols = []
	for col in df.columns:
	if any(substr.lower() in col.lower() for substr in substrings):
	display_name = self.column_mapping.get(col, col)
	matching_cols.append(display_name)
	filtered_cols[filter_name] = matching_cols

	return filtered_cols

	def update(
	self,
	value: Union[pd.DataFrame, Dict[str, List[str]], Any],
	interactive: Optional[bool] = None
	) -> Dict:
	"""
	Update component with new values, supporting DataFrame fields.

	Args:
	value: DataFrame, dict of columns, or fields object
	interactive: Whether component should be interactive

	Returns:
	Dict containing update configuration
	"""
	if isinstance(value, pd.DataFrame):
	filtered_cols = self.get_filtered_columns(value)
	choices = [self.column_mapping.get(col, col) for col in value.columns]

	# Set initial selection if provided
	value = self.initial_selected if self.initial_selected else choices

	return {
	"choices": choices,
	"value": value,
	"filtered_cols": filtered_cols,
	"interactive": interactive if interactive is not None else self.interactive
	}

	# Handle fields object (e.g., from dataclass)
	if hasattr(value, '__dataclass_fields__'):
	field_names = [field.name for field in fields(value)]
	choices = [self.column_mapping.get(name, name) for name in field_names]
	return {
	"choices": choices,
	"value": self.initial_selected if self.initial_selected else choices,
	"interactive": interactive if interactive is not None else self.interactive
	}

	return super().update(value, interactive)

	def initialize_leaderboard(df: pd.DataFrame, column_class: Any,
	filters: Dict[str, List[str]],
	mappings: Dict[str, str],
	initial_columns: Optional[List[str]] = None) -> gr.Leaderboard:
	"""
	Initialize a Gradio Leaderboard with SmartSelectColumns.

	Args:
	df: Input DataFrame
	column_class: Class containing column definitions (e.g., AutoEvalColumn_mib_subgraph)
	filters: Column filters for substring matching
	mappings: Column name mappings (actual -> display)
	initial_columns: List of columns to show initially

	Returns:
	Configured Leaderboard instance
	"""

	# Define filters and mappings
	filters = {
	"IOI Metrics": ["ioi"],
	"Performance Metrics": ["performance"]
	}

	mappings = {
	"ioi_score_1": "IOI Score (Type 1)",
	"ioi_score_2": "IOI Score (Type 2)",
	"other_metric": "Other Metric",
	"performance_1": "Performance Metric 1"
	}



	# Example usage
	if __name__ == "__main__":
	# Sample data
	df = pd.DataFrame({
	"ioi_score_1": [1, 2, 3],
	"ioi_score_2": [4, 5, 6],
	"other_metric": [7, 8, 9],
	"performance_1": [10, 11, 12],
	"Method": ["A", "B", "C"]
	})

	# Define filters and mappings
	filters = {
	"IOI Metrics": ["ioi"],
	"gemma2.5": ["gemma2_5`"]
	}

	mappings = {
	"ioi_score_1": "IOI Score (Type 1)",
	"ioi_score_2": "IOI Score (Type 2)",
	"other_metric": "Other Metric",
	"performance_1": "Performance Metric 1"
	}

	# Create demo interface
	with gr.Blocks() as demo:
	# Initialize leaderboard with smart columns
	leaderboard = initialize_leaderboard(
	df=df,
	column_class=None, # Replace with your actual column class
	filters=filters,
	mappings=mappings,
	initial_columns=["Method", "IOI Score (Type 1)"]
	)


	# Create renamed DataFrame with display names
	renamed_df = df.rename(columns=mappings)

	initial_columns=["Method", "IOI Score (Type 1)"]
	initial_columns=?

	# Initialize SmartSelectColumns
	smart_columns = SmartSelectColumns(
	column_filters=filters,
	column_mapping=mappings,
	initial_selected=initial_columns,
	multiselect=True
	)
	column_class=None

	return gr.Leaderboard(
	value=renamed_df,
	datatype=[c.type for c in fields(column_class)],
	select_columns=smart_columns,
	search_columns=["Method"],
	hide_columns=[],
	interactive=False
	)

	demo.launch()





































	from gradio_leaderboard import SelectColumns, Leaderboard
	import pandas as pd
	from typing import List, Dict, Union, Optional, Any
	from dataclasses import fields

	class SmartSelectColumns(SelectColumns):
	"""
	Enhanced SelectColumns component for gradio_leaderboard with explicit column grouping.
	"""
	def __init__(
	self,
	column_groups: Optional[Dict[str, List[str]]] = None,
	column_mapping: Optional[Dict[str, str]] = None,
	initial_selected: Optional[List[str]] = None,
	**kwargs
	):
	"""
	Initialize SmartSelectColumns with enhanced functionality.

	Args:
	column_groups: Dict mapping group names to lists of columns in that group
	column_mapping: Dict mapping actual column names to display names
	initial_selected: List of columns to show initially
	"""
	super().__init__(**kwargs)
	self.column_groups = column_groups or {}
	self.column_mapping = column_mapping or {}
	self.reverse_mapping = {v: k for k, v in self.column_mapping.items()} if column_mapping else {}
	self.initial_selected = initial_selected or []

	def preprocess_value(self, x: List[str]) -> List[str]:
	"""Transform selected display names back to actual column names."""
	return [self.reverse_mapping.get(col, col) for col in x]

	def postprocess_value(self, y: List[str]) -> List[str]:
	"""Transform actual column names to display names."""
	return [self.column_mapping.get(col, col) for col in y]

	def update(
	self,
	value: Union[pd.DataFrame, Dict[str, List[str]], Any]
	) -> Dict:
	"""Update component with new values."""
	if isinstance(value, pd.DataFrame):
	# Get all column names and convert to display names
	choices = [self.column_mapping.get(col, col) for col in value.columns]

	# Use initial selection or default columns
	selected = self.initial_selected if self.initial_selected else choices

	# Convert column groups to use display names
	filtered_cols = {}
	for group_name, columns in self.column_groups.items():
	filtered_cols[group_name] = [
	self.column_mapping.get(col, col)
	for col in columns
	if col in value.columns
	]

	return {
	"choices": choices,
	"value": selected,
	"filtered_cols": filtered_cols
	}

	# Handle fields object
	if hasattr(value, '__dataclass_fields__'):
	field_names = [field.name for field in fields(value)]
	choices = [self.column_mapping.get(name, name) for name in field_names]
	return {
	"choices": choices,
	"value": self.initial_selected if self.initial_selected else choices
	}

	return super().update(value)


	# Example usage
	if __name__ == "__main__":
	# Sample DataFrame
	# df = pd.DataFrame({
	# "eval_name": ["test1", "test2", "test3"],
	# "Method": ["method1", "method2", "method3"],
	# "ioi_llama3": [0.1, 0.2, 0.3],
	# "ioi_qwen2_5": [0.4, 0.5, 0.6],
	# "ioi_gpt2": [0.7, 0.8, 0.9],
	# "mcqa_llama3": [0.2, 0.3, 0.4],
	# "Average": [0.35, 0.45, 0.55]
	# })

	# Complete column groups for both benchmarks and models
	column_groups = {
	# Benchmark groups
	"Benchmark group for ioi": ["ioi_gpt2", "ioi_qwen2_5", "ioi_gemma2", "ioi_llama3"],
	"Benchmark group for mcqa": ["mcqa_qwen2_5", "mcqa_gemma2", "mcqa_llama3"],
	"Benchmark group for arithmetic_addition": ["arithmetic_addition_llama3"],
	"Benchmark group for arithmetic_subtraction": ["arithmetic_subtraction_llama3"],
	"Benchmark group for arc_easy": ["arc_easy_gemma2", "arc_easy_llama3"],
	"Benchmark group for arc_challenge": ["arc_challenge_llama3"],

	# Model groups
	"Model group for qwen2_5": ["ioi_qwen2_5", "mcqa_qwen2_5"],
	"Model group for gpt2": ["ioi_gpt2"],
	"Model group for gemma2": ["ioi_gemma2", "mcqa_gemma2", "arc_easy_gemma2"],
	"Model group for llama3": [
	"ioi_llama3",
	"mcqa_llama3",
	"arithmetic_addition_llama3",
	"arithmetic_subtraction_llama3",
	"arc_easy_llama3",
	"arc_challenge_llama3"
	]
	}

	# Complete mappings for more readable display names
	mappings = {
	# IOI benchmark mappings
	"ioi_llama3": "IOI (LLaMA-3)",
	"ioi_qwen2_5": "IOI (Qwen-2.5)",
	"ioi_gpt2": "IOI (GPT-2)",
	"ioi_gemma2": "IOI (Gemma-2)",

	# MCQA benchmark mappings
	"mcqa_llama3": "MCQA (LLaMA-3)",
	"mcqa_qwen2_5": "MCQA (Qwen-2.5)",
	"mcqa_gemma2": "MCQA (Gemma-2)",

	# Arithmetic benchmark mappings
	"arithmetic_addition_llama3": "Arithmetic Addition (LLaMA-3)",
	"arithmetic_subtraction_llama3": "Arithmetic Subtraction (LLaMA-3)",

	# ARC benchmark mappings
	"arc_easy_llama3": "ARC Easy (LLaMA-3)",
	"arc_easy_gemma2": "ARC Easy (Gemma-2)",
	"arc_challenge_llama3": "ARC Challenge (LLaMA-3)",

	# Other columns
	"eval_name": "Evaluation Name",
	"Method": "Method",
	"Average": "Average Score"
	}

	# Create SmartSelectColumns instance
	smart_columns = SmartSelectColumns(
	column_groups=column_groups,
	column_mapping=mappings,
	initial_selected=["Method", "Average"]
	)

	# Create Leaderboard directly
	leaderboard = Leaderboard(
	value=df,
	datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
	select_columns=smart_columns,
	search_columns=["Method"],
	hide_columns=[],
	interactive=False
	)

























	from gradio_leaderboard import SelectColumns, Leaderboard
	import pandas as pd
	from typing import List, Dict, Union, Optional, Any
	from dataclasses import fields

	class SmartSelectColumns(SelectColumns):
	"""
	Enhanced SelectColumns component for gradio_leaderboard with dynamic column filtering.
	"""
	def __init__(
	self,
	benchmark_keywords: Optional[List[str]] = None,
	model_keywords: Optional[List[str]] = None,
	column_mapping: Optional[Dict[str, str]] = None,
	initial_selected: Optional[List[str]] = None,
	**kwargs
	):
	"""
	Initialize SmartSelectColumns with dynamic filtering.

	Args:
	benchmark_keywords: List of benchmark names to filter by (e.g., ["ioi", "mcqa"])
	model_keywords: List of model names to filter by (e.g., ["llama3", "qwen2_5"])
	column_mapping: Dict mapping actual column names to display names
	initial_selected: List of columns to show initially
	"""
	super().__init__(**kwargs)
	self.benchmark_keywords = benchmark_keywords or []
	self.model_keywords = model_keywords or []
	self.column_mapping = column_mapping or {}
	self.reverse_mapping = {v: k for k, v in self.column_mapping.items()} if column_mapping else {}
	self.initial_selected = initial_selected or []

	def preprocess_value(self, x: List[str]) -> List[str]:
	"""Transform selected display names back to actual column names."""
	return [self.reverse_mapping.get(col, col) for col in x]

	def postprocess_value(self, y: List[str]) -> List[str]:
	"""Transform actual column names to display names."""
	return [self.column_mapping.get(col, col) for col in y]

	def get_filtered_groups(self, df: pd.DataFrame) -> Dict[str, List[str]]:
	"""
	Dynamically create column groups based on keywords.
	"""
	filtered_groups = {}

	# Create benchmark groups
	for benchmark in self.benchmark_keywords:
	matching_cols = [
	col for col in df.columns
	if benchmark in col.lower()
	]
	if matching_cols:
	group_name = f"Benchmark group for {benchmark}"
	filtered_groups[group_name] = [
	self.column_mapping.get(col, col)
	for col in matching_cols
	]

	# Create model groups
	for model in self.model_keywords:
	matching_cols = [
	col for col in df.columns
	if model in col.lower()
	]
	if matching_cols:
	group_name = f"Model group for {model}"
	filtered_groups[group_name] = [
	self.column_mapping.get(col, col)
	for col in matching_cols
	]

	return filtered_groups

	def update(
	self,
	value: Union[pd.DataFrame, Dict[str, List[str]], Any]
	) -> Dict:
	"""Update component with new values."""
	if isinstance(value, pd.DataFrame):
	# Get all column names and convert to display names
	choices = [self.column_mapping.get(col, col) for col in value.columns]

	# Use initial selection or default columns
	selected = self.initial_selected if self.initial_selected else choices

	# Get dynamically filtered groups
	filtered_cols = self.get_filtered_groups(value)

	return {
	"choices": choices,
	"value": selected,
	"filtered_cols": filtered_cols
	}

	# Handle fields object
	if hasattr(value, '__dataclass_fields__'):
	field_names = [field.name for field in fields(value)]
	choices = [self.column_mapping.get(name, name) for name in field_names]
	return {
	"choices": choices,
	"value": self.initial_selected if self.initial_selected else choices
	}

	return super().update(value)


	# Example usage
	if __name__ == "__main__":
	# Sample DataFrame
	df = pd.DataFrame({
	"eval_name": ["test1", "test2", "test3"],
	"Method": ["method1", "method2", "method3"],
	"ioi_llama3": [0.1, 0.2, 0.3],
	"ioi_qwen2_5": [0.4, 0.5, 0.6],
	"ioi_gpt2": [0.7, 0.8, 0.9],
	"mcqa_llama3": [0.2, 0.3, 0.4],
	"Average": [0.35, 0.45, 0.55]
	})

	# Define keywords for filtering
	benchmark_keywords = ["ioi", "mcqa", "arithmetic_addition", "arithmetic_subtraction", "arc_easy", "arc_challenge"]
	model_keywords = ["qwen2_5", "gpt2", "gemma2", "llama3"]

	# Optional: Define display names
	mappings = {
	"ioi_llama3": "IOI (LLaMA-3)",
	"ioi_qwen2_5": "IOI (Qwen-2.5)",
	"ioi_gpt2": "IOI (GPT-2)",
	"ioi_gemma2": "IOI (Gemma-2)",
	"mcqa_llama3": "MCQA (LLaMA-3)",
	"mcqa_qwen2_5": "MCQA (Qwen-2.5)",
	"mcqa_gemma2": "MCQA (Gemma-2)",
	"arithmetic_addition_llama3": "Arithmetic Addition (LLaMA-3)",
	"arithmetic_subtraction_llama3": "Arithmetic Subtraction (LLaMA-3)",
	"arc_easy_llama3": "ARC Easy (LLaMA-3)",
	"arc_easy_gemma2": "ARC Easy (Gemma-2)",
	"arc_challenge_llama3": "ARC Challenge (LLaMA-3)",
	"eval_name": "Evaluation Name",
	"Method": "Method",
	"Average": "Average Score"
	}

	# Create SmartSelectColumns instance
	smart_columns = SmartSelectColumns(
	benchmark_keywords=benchmark_keywords,
	model_keywords=model_keywords,
	column_mapping=mappings,
	initial_selected=["Method", "Average"]
	)

	# Create Leaderboard
	leaderboard = Leaderboard(
	value=df,
	datatype=[c.type for c in fields(AutoEvalColumn_mib_subgraph)],
	select_columns=smart_columns,
	search_columns=["Method"],
	hide_columns=[],
	interactive=False
	)































	Debugging DataFrame columns: ['eval_name', 'Method', 'ioi_llama3', 'ioi_qwen2_5', 'ioi_gpt2', 'ioi_gemma2', 'mcqa_llama3', 'mcqa_qwen2_5', 'mcqa_gemma2', 'arithmetic_addition_llama3', 'arithmetic_subtraction_llama3', 'arc_easy_llama3', 'arc_easy_gemma2', 'arc_challenge_llama3', 'Average']

	Benchmark group for ioi: ['ioi_gpt2', 'ioi_qwen2_5', 'ioi_gemma2', 'ioi_llama3']

	Benchmark group for mcqa: ['mcqa_qwen2_5', 'mcqa_gemma2', 'mcqa_llama3']

	Benchmark group for arithmetic_addition: ['arithmetic_addition_llama3']

	Benchmark group for arithmetic_subtraction: ['arithmetic_subtraction_llama3']

	Benchmark group for arc_easy: ['arc_easy_gemma2', 'arc_easy_llama3']

	Benchmark group for arc_challenge: ['arc_challenge_llama3']

	Model group for qwen2_5: ['ioi_qwen2_5', 'mcqa_qwen2_5']

	Model group for gpt2: ['ioi_gpt2']

	Model group for gemma2: ['ioi_gemma2', 'mcqa_gemma2', 'arc_easy_gemma2']

	Model group for llama3: ['ioi_llama3', 'mcqa_llama3', 'arithmetic_addition_llama3', 'arithmetic_subtraction_llama3', 'arc_easy_llama3', 'arc_challenge_llama3']

	All available columns: ['ioi_gpt2', 'ioi_qwen2_5', 'ioi_gemma2', 'ioi_llama3', 'mcqa_qwen2_5', 'mcqa_gemma2', 'mcqa_llama3', 'arithmetic_addition_llama3', 'arithmetic_subtraction_llama3', 'arc_easy_gemma2', 'arc_easy_llama3', 'arc_challenge_llama3', 'ioi_qwen2_5', 'mcqa_qwen2_5', 'ioi_gpt2', 'ioi_gemma2', 'mcqa_gemma2', 'arc_easy_gemma2', 'ioi_llama3', 'mcqa_llama3', 'arithmetic_addition_llama3', 'arithmetic_subtraction_llama3', 'arc_easy_llama3', 'arc_challenge_llama3']
	* Running on local URL: http://0.0.0.0:7860
	/usr/local/lib/python3.10/site-packages/gradio/blocks.py:2634: UserWarning: Setting share=True is not supported on Hugging Face Spaces
	warnings.warn(

	To create a public link, set `share=True` in `launch()`.



	model_id: llama3, gemma2, gpt2, qwen2.5,