Spaces:
Running
Running
from dataclasses import dataclass, make_dataclass | |
from enum import Enum | |
import pandas as pd | |
from src.about import Tasks, TasksMib_Subgraph, TasksMib_Causalgraph | |
def fields(raw_class): | |
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"] | |
# These classes are for user facing column names, | |
# to avoid having to change them all around the code | |
# when a modif is needed | |
class ColumnContent: | |
name: str | |
type: str | |
displayed_by_default: bool | |
hidden: bool = False | |
never_hidden: bool = False | |
## Leaderboard columns | |
auto_eval_column_dict = [] | |
auto_eval_column_dict_multimodal = [] | |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)]) | |
auto_eval_column_dict.append(["hf_repo", ColumnContent, ColumnContent("HF Repo", "str", False)]) | |
auto_eval_column_dict.append(["track", ColumnContent, ColumnContent("Track", "markdown", False)]) | |
#Scores | |
# for task in Tasks: | |
# auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)]) | |
# Model information | |
auto_eval_column_dict.append(["text_average", ColumnContent, ColumnContent("Text Average", "number", True)]) | |
auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)]) | |
auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)]) | |
auto_eval_column_dict_multimodal.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)]) | |
auto_eval_column_dict_multimodal.append(["hf_repo", ColumnContent, ColumnContent("HF Repo", "str", False)]) | |
auto_eval_column_dict_multimodal.append(["track", ColumnContent, ColumnContent("Track", "markdown", False)]) | |
# for task in TasksMultimodal: | |
# auto_eval_column_dict_multimodal.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)]) | |
# if task.value.col_name in ("ewok", "EWoK"): # make sure this appears in the right order | |
# auto_eval_column_dict_multimodal.append(["text_average", ColumnContent, ColumnContent("Text Average", "number", True)]) | |
auto_eval_column_dict_multimodal.append(["vision_average", ColumnContent, ColumnContent("Vision Average", "number", True)]) | |
auto_eval_column_dict_multimodal.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)]) | |
auto_eval_column_dict_multimodal.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)]) | |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True) | |
AutoEvalColumnMultimodal = make_dataclass("AutoEvalColumnMultimodal", auto_eval_column_dict_multimodal, frozen=True) | |
############################################################################################################## | |
# Version 3 | |
auto_eval_column_dict_mib_subgraph = [] | |
# Method name column (always present) | |
auto_eval_column_dict_mib_subgraph.append( | |
["method", ColumnContent, ColumnContent("Method", "markdown", True, never_hidden=True)] | |
) | |
# Add columns for each task-model combination | |
for task in TasksMib_Subgraph: | |
for model in task.value.models: | |
field_name = f"{task.value.benchmark}_{model}" | |
display_name = f"{task.value.benchmark}({model})" | |
print(f"Creating column - Field name: {field_name}, Display name: {display_name}") | |
column_content = ColumnContent(display_name, "number", True) | |
print(f"Column content name property: {column_content.name}") | |
auto_eval_column_dict_mib_subgraph.append([ | |
field_name, | |
ColumnContent, | |
column_content | |
]) | |
# Add the Average column | |
auto_eval_column_dict_mib_subgraph.append( | |
["average", ColumnContent, ColumnContent("Average", "number", True)] | |
) | |
print("\nFinal column configurations:") | |
for field in auto_eval_column_dict_mib_subgraph: | |
print(f"Field name: {field[0]}, Display name: {field[2].name}") | |
# Create the dataclass for MIB columns | |
AutoEvalColumn_mib_subgraph = make_dataclass("AutoEvalColumn_mib_subgraph", auto_eval_column_dict_mib_subgraph, frozen=True) | |
# Column selection for display | |
COLS_MIB_SUBGRAPH = [c.name for c in fields(AutoEvalColumn_mib_subgraph) if not c.hidden] | |
BENCHMARK_COLS_MIB_SUBGRAPH = [] | |
for task in TasksMib_Subgraph: | |
for model in task.value.models: | |
col_name = f"{task.value.col_name}_{model.replace('-', '_')}" | |
BENCHMARK_COLS_MIB_SUBGRAPH.append(col_name) | |
# Implement the same for causal graph, auto_eval_column_dict_mib_causalgraph, AutoEvalColumn_mib_causalgraph | |
AutoEvalColumn_mib_causalgraph = [] | |
COLS_MIB_CAUSALGRAPH = [] | |
BENCHMARK_COLS_MIB_CAUSALGRAPH = [] | |
auto_eval_column_dict_mib_causalgraph = [] | |
# Only include Method column as required | |
auto_eval_column_dict_mib_causalgraph.append(["method", ColumnContent, ColumnContent("Method", "markdown", True, never_hidden=True)]) | |
# For each model-task-intervention-counterfactual combination | |
for task in TasksMib_Causalgraph: | |
for model in task.value.models: # model will be lowercase | |
col_name = f"{task.value.benchmark}_{model}" | |
auto_eval_column_dict_mib_causalgraph.append([ | |
col_name, | |
ColumnContent, | |
ColumnContent(col_name, "number", True) | |
]) | |
# Add the Average column | |
auto_eval_column_dict_mib_causalgraph.append( | |
["average_score", ColumnContent, ColumnContent("Average", "number", True)] | |
) | |
# Create the dataclass | |
AutoEvalColumn_mib_causalgraph = make_dataclass( | |
"AutoEvalColumn_mib_causalgraph", | |
auto_eval_column_dict_mib_causalgraph, | |
frozen=True | |
) | |
## For the queue columns in the submission tab | |
class EvalQueueColumn: # Queue column | |
track_name = ColumnContent("track", "str", True) | |
method_name = ColumnContent("method_name", "str", True) | |
repo_id = ColumnContent("hf_repo", "markdown", True) | |
revision = ColumnContent("revision", "str", True) | |
status = ColumnContent("status", "str", True) | |
## All the model information that we might need | |
class ModelDetails: | |
name: str | |
display_name: str = "" | |
symbol: str = "" # emoji | |
# Column selection | |
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden] | |
COLS_MULTIMODAL = [c.name for c in fields(AutoEvalColumnMultimodal) if not c.hidden] | |
EVAL_COLS = [c.name for c in fields(EvalQueueColumn)] | |
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)] | |
BENCHMARK_COLS = [t.value.col_name for t in Tasks] |