File size: 6,540 Bytes
2fc77f5
 
 
 
 
3c343e0
2fc77f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e46e945
 
 
 
3c343e0
 
e46e945
 
 
 
 
 
 
 
3c343e0
 
 
 
e46e945
 
 
 
 
 
 
 
 
 
 
a90efab
09666ed
 
a90efab
09666ed
 
 
 
a90efab
09666ed
 
 
 
 
f732437
09666ed
53c242a
09666ed
 
53c242a
09666ed
 
 
 
 
a90efab
09666ed
 
 
 
f732437
09666ed
 
 
a90efab
 
2fc77f5
5dd7582
2fc77f5
 
61542b8
2fc77f5
5dd7582
 
06e8556
2fc77f5
 
5dd7582
2fc77f5
5dd7582
61542b8
 
 
2fc77f5
 
e46e945
 
4493851
e46e945
531005f
f65df62
e46e945
4493851
4780a48
 
 
 
 
 
 
 
 
 
 
53c7136
a100ebc
531005f
 
 
 
 
e46e945
2fc77f5
 
 
 
e1faa87
5ed4bca
 
2fc77f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11e2149
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
from dataclasses import dataclass, make_dataclass
from enum import Enum

import pandas as pd

from src.about import Tasks, TasksMib_Subgraph, TasksMib_Causalgraph

def fields(raw_class):
    return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]


# These classes are for user facing column names,
# to avoid having to change them all around the code
# when a modif is needed
@dataclass
class ColumnContent:
    name: str
    type: str
    displayed_by_default: bool
    hidden: bool = False
    never_hidden: bool = False

## Leaderboard columns
auto_eval_column_dict = []
auto_eval_column_dict_multimodal = []

auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
auto_eval_column_dict.append(["hf_repo", ColumnContent, ColumnContent("HF Repo", "str", False)])
auto_eval_column_dict.append(["track", ColumnContent, ColumnContent("Track", "markdown", False)])
#Scores
# for task in Tasks:
#     auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
# Model information
auto_eval_column_dict.append(["text_average", ColumnContent, ColumnContent("Text Average", "number", True)])
auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])

auto_eval_column_dict_multimodal.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
auto_eval_column_dict_multimodal.append(["hf_repo", ColumnContent, ColumnContent("HF Repo", "str", False)])
auto_eval_column_dict_multimodal.append(["track", ColumnContent, ColumnContent("Track", "markdown", False)])
# for task in TasksMultimodal:
#     auto_eval_column_dict_multimodal.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
#     if task.value.col_name in ("ewok", "EWoK"):   # make sure this appears in the right order
#         auto_eval_column_dict_multimodal.append(["text_average", ColumnContent, ColumnContent("Text Average", "number", True)])
auto_eval_column_dict_multimodal.append(["vision_average", ColumnContent, ColumnContent("Vision Average", "number", True)])
auto_eval_column_dict_multimodal.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
auto_eval_column_dict_multimodal.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])



AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
AutoEvalColumnMultimodal = make_dataclass("AutoEvalColumnMultimodal", auto_eval_column_dict_multimodal, frozen=True)



##############################################################################################################
# Version 3
auto_eval_column_dict_mib_subgraph = []

# Method name column (always present)
auto_eval_column_dict_mib_subgraph.append(
    ["method", ColumnContent, ColumnContent("Method", "markdown", True, never_hidden=True)]
)

# Add columns for each task-model combination
for task in TasksMib_Subgraph:
    for model in task.value.models:
        field_name = f"{task.value.benchmark}_{model}"
        display_name = f"{task.value.benchmark}({model})"
        
        print(f"Creating column - Field name: {field_name}, Display name: {display_name}")
        
        column_content = ColumnContent(display_name, "number", True)
        print(f"Column content name property: {column_content.name}")
        
        auto_eval_column_dict_mib_subgraph.append([
            field_name,
            ColumnContent,
            column_content
        ])

# Add the Average column
auto_eval_column_dict_mib_subgraph.append(
    ["average", ColumnContent, ColumnContent("Average", "number", True)]
)

print("\nFinal column configurations:")
for field in auto_eval_column_dict_mib_subgraph:
    print(f"Field name: {field[0]}, Display name: {field[2].name}")


# Create the dataclass for MIB columns
AutoEvalColumn_mib_subgraph = make_dataclass("AutoEvalColumn_mib_subgraph", auto_eval_column_dict_mib_subgraph, frozen=True)

# Column selection for display
COLS_MIB_SUBGRAPH = [c.name for c in fields(AutoEvalColumn_mib_subgraph) if not c.hidden]


BENCHMARK_COLS_MIB_SUBGRAPH = []
for task in TasksMib_Subgraph:
    for model in task.value.models:
        col_name = f"{task.value.col_name}_{model.replace('-', '_')}"
        BENCHMARK_COLS_MIB_SUBGRAPH.append(col_name)

# Implement the same for causal graph, auto_eval_column_dict_mib_causalgraph, AutoEvalColumn_mib_causalgraph
AutoEvalColumn_mib_causalgraph = []
COLS_MIB_CAUSALGRAPH = []
BENCHMARK_COLS_MIB_CAUSALGRAPH = []


auto_eval_column_dict_mib_causalgraph = []

# Only include Method column as required
auto_eval_column_dict_mib_causalgraph.append(["method", ColumnContent, ColumnContent("Method", "markdown", True, never_hidden=True)])

# For each model-task-intervention-counterfactual combination
for task in TasksMib_Causalgraph:
    for model in task.value.models:  # model will be lowercase
        col_name = f"{task.value.benchmark}_{model}"
        auto_eval_column_dict_mib_causalgraph.append([
            col_name, 
            ColumnContent, 
            ColumnContent(col_name, "number", True)
        ])

# Add the Average column
auto_eval_column_dict_mib_causalgraph.append(
    ["average_score", ColumnContent, ColumnContent("Average", "number", True)]
)

# Create the dataclass
AutoEvalColumn_mib_causalgraph = make_dataclass(
    "AutoEvalColumn_mib_causalgraph", 
    auto_eval_column_dict_mib_causalgraph,
    frozen=True
)


## For the queue columns in the submission tab
@dataclass(frozen=True)
class EvalQueueColumn:  # Queue column
    track_name = ColumnContent("track", "str", True)
    method_name = ColumnContent("method_name", "str", True)
    repo_id = ColumnContent("hf_repo", "markdown", True)
    revision = ColumnContent("revision", "str", True)
    status = ColumnContent("status", "str", True)

## All the model information that we might need
@dataclass
class ModelDetails:
    name: str
    display_name: str = ""
    symbol: str = "" # emoji

# Column selection

COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
COLS_MULTIMODAL = [c.name for c in fields(AutoEvalColumnMultimodal) if not c.hidden]

EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]

BENCHMARK_COLS = [t.value.col_name for t in Tasks]