Spaces:
Running
Running
Update helper.py
Browse files
helper.py
CHANGED
@@ -30,21 +30,26 @@ TASKS_LIST={
|
|
30 |
'pos':'Part-of-Speech Tagging',
|
31 |
}
|
32 |
CLUSTERS = {
|
33 |
-
"Text Classification": [
|
34 |
'xlni', 'lid', 'news', 'sentiment', 'topic',
|
35 |
],
|
36 |
-
"Text Generation": [
|
37 |
'mt_eng2xx', 'mt_fra2xx', 'mt_xx2xx', 'paraphrase', 'summary', 'title',
|
38 |
],
|
39 |
-
"MCCR": [
|
40 |
'mmlu', 'mgsm', 'belebele', 'squad_qa',
|
41 |
],
|
42 |
-
"Tokens": [
|
43 |
'ner', 'phrase', 'pos',
|
44 |
],
|
45 |
}
|
46 |
ALL_TASKS = [t for cluster in CLUSTERS.values() for t in cluster]
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
48 |
# ===== Authenticate and Load Data From Private HF Repo =====
|
49 |
|
50 |
def load_private_leaderboard_df():
|
@@ -89,9 +94,9 @@ LANG_ISO2NAME = {
|
|
89 |
'som': 'Somali',
|
90 |
'pcm': 'Nigerian Pidgin',
|
91 |
'teo': 'Teso',
|
92 |
-
'nyn': 'Nyankore
|
93 |
'lgg': 'Lugbara',
|
94 |
-
'bem': 'Bemba
|
95 |
'tsn': 'Tswana',
|
96 |
'bbj': 'Ghomálá',
|
97 |
'mos': 'Moore',
|
@@ -101,11 +106,11 @@ LANG_ISO2NAME = {
|
|
101 |
'nso': 'Sepedi',
|
102 |
'tso': 'Tsonga',
|
103 |
'fuv': 'Fulfude Nigeria',
|
104 |
-
'gaz': 'Oromo, West Central',
|
105 |
'kea': 'Kabuverdianu',
|
106 |
'nya': 'Nyanja',
|
107 |
'ssw': 'Swati',
|
108 |
-
'luo': 'Dholuo
|
109 |
'ven': 'Venda',
|
110 |
'kir':"Kirundi",
|
111 |
}
|
@@ -120,7 +125,100 @@ def build_langname_to_isos(iso2name):
|
|
120 |
LANGNAME2ISOS = build_langname_to_isos(LANG_ISO2NAME)
|
121 |
#show only African langs
|
122 |
LANG_NAME_LIST = sorted([lang for lang in LANGNAME2ISOS.keys() if lang not in ['eng', 'fra', 'English', 'French']])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
def get_task_metric_map(df):
|
125 |
mapping = {}
|
126 |
for _, row in df.iterrows():
|
@@ -208,60 +306,17 @@ def df_to_html(df, col_minwidth=90, col_maxwidth=140, model_col_width=400):
|
|
208 |
# Remove any column whose name contains "task"
|
209 |
drop_cols = [col for col in df.columns if "task" in col]
|
210 |
df = df.drop(columns=drop_cols, errors="ignore")
|
211 |
-
df.columns.name = None
|
212 |
-
html=
|
213 |
-
# html = f"""
|
214 |
-
# <style>
|
215 |
-
# .gradio-container-5-34-1 .prose table {{
|
216 |
-
# border-top: 2px solid #dca02a;
|
217 |
-
# border-bottom: 2px solid #dca02a;
|
218 |
-
# margin-bottom:20px;
|
219 |
-
# margin-left: auto;
|
220 |
-
# margin-right: auto;
|
221 |
-
# width: 100%;
|
222 |
-
# border-collapse: collapse;
|
223 |
-
# table-layout: fixed;
|
224 |
-
# }}
|
225 |
-
# .gradio-container-5-34-1 .prose thead tr {{
|
226 |
-
# background: #fffbe9;
|
227 |
-
# border-bottom: 2px solid #dca02a;
|
228 |
-
# }}
|
229 |
-
# .gradio-container-5-34-1 .prose th {{
|
230 |
-
# color: #7d3561;
|
231 |
-
# font-weight: bold;
|
232 |
-
# font-size: 20px;
|
233 |
-
# background: #fffbe9;
|
234 |
-
# padding: 8px 5px;
|
235 |
-
# vertical-align: middle;
|
236 |
-
# border: 0px solid #e0e0e0;
|
237 |
-
# }}
|
238 |
-
# td {{
|
239 |
-
# font-size: 18px;
|
240 |
-
# padding: 8px 5px;
|
241 |
-
# border: 0px solid #e0e0e0;
|
242 |
-
# vertical-align: middle;
|
243 |
-
# }}
|
244 |
-
# th:first-child, td:first-child {{
|
245 |
-
# min-width: {model_col_width}px !important;
|
246 |
-
# max-width: {model_col_width}px !important;
|
247 |
-
# width: {model_col_width}px !important;
|
248 |
-
# text-align: left !important;
|
249 |
-
# }}
|
250 |
-
# th:not(:first-child), td:not(:first-child) {{
|
251 |
-
# min-width: {col_minwidth}px;
|
252 |
-
# max-width: {col_maxwidth}px;
|
253 |
-
# width: auto;
|
254 |
-
# text-align: center;
|
255 |
-
# }}
|
256 |
-
# </style>
|
257 |
-
# """
|
258 |
-
html += df.to_html(index=False, escape=False)
|
259 |
return html
|
260 |
|
261 |
|
262 |
|
263 |
cluster_tabs, main_overall_tab, all_df, metric_map = load_leaderboards()
|
264 |
|
|
|
|
|
|
|
265 |
def get_lang_table(lang_name):
|
266 |
iso_codes = LANGNAME2ISOS.get(lang_name, [])
|
267 |
if not iso_codes:
|
|
|
30 |
'pos':'Part-of-Speech Tagging',
|
31 |
}
|
32 |
CLUSTERS = {
|
33 |
+
"Text Classification Tasks": [
|
34 |
'xlni', 'lid', 'news', 'sentiment', 'topic',
|
35 |
],
|
36 |
+
"Text Generation Tasks": [
|
37 |
'mt_eng2xx', 'mt_fra2xx', 'mt_xx2xx', 'paraphrase', 'summary', 'title',
|
38 |
],
|
39 |
+
"MCCR Tasks": [
|
40 |
'mmlu', 'mgsm', 'belebele', 'squad_qa',
|
41 |
],
|
42 |
+
"Tokens Level Tasks": [
|
43 |
'ner', 'phrase', 'pos',
|
44 |
],
|
45 |
}
|
46 |
ALL_TASKS = [t for cluster in CLUSTERS.values() for t in cluster]
|
47 |
+
# This dictionary maps each task ID to its parent cluster name
|
48 |
+
TASK_TO_CLUSTER_MAP = {
|
49 |
+
task: cluster_name
|
50 |
+
for cluster_name, tasks in CLUSTERS.items()
|
51 |
+
for task in tasks
|
52 |
+
}
|
53 |
# ===== Authenticate and Load Data From Private HF Repo =====
|
54 |
|
55 |
def load_private_leaderboard_df():
|
|
|
94 |
'som': 'Somali',
|
95 |
'pcm': 'Nigerian Pidgin',
|
96 |
'teo': 'Teso',
|
97 |
+
'nyn': 'Nyankore',# (Nyankole)',
|
98 |
'lgg': 'Lugbara',
|
99 |
+
'bem': 'Bemba',# (Chibemba)',
|
100 |
'tsn': 'Tswana',
|
101 |
'bbj': 'Ghomálá',
|
102 |
'mos': 'Moore',
|
|
|
106 |
'nso': 'Sepedi',
|
107 |
'tso': 'Tsonga',
|
108 |
'fuv': 'Fulfude Nigeria',
|
109 |
+
'gaz': 'Oromo', #, West Central',
|
110 |
'kea': 'Kabuverdianu',
|
111 |
'nya': 'Nyanja',
|
112 |
'ssw': 'Swati',
|
113 |
+
'luo': 'Dholuo',# (Luo)',
|
114 |
'ven': 'Venda',
|
115 |
'kir':"Kirundi",
|
116 |
}
|
|
|
125 |
LANGNAME2ISOS = build_langname_to_isos(LANG_ISO2NAME)
|
126 |
#show only African langs
|
127 |
LANG_NAME_LIST = sorted([lang for lang in LANGNAME2ISOS.keys() if lang not in ['eng', 'fra', 'English', 'French']])
|
128 |
+
# TASK_NAME_LIST = sorted(list(TASKS_LIST.values()))
|
129 |
+
# Create a list of choices in the format "Task Name (id)"
|
130 |
+
TASK_NAME_LIST = sorted([f"{name} ({key})" for key, name in TASKS_LIST.items()])
|
131 |
+
TASK_NAME2KEY = {v: k for k, v in TASKS_LIST.items()}
|
132 |
+
|
133 |
+
def get_model_table(model_name):
|
134 |
+
"""
|
135 |
+
Generates a performance table for a specific model, showing cluster, task, and score.
|
136 |
+
The table is sorted by Cluster and then by Task Name.
|
137 |
+
"""
|
138 |
+
# Filter for the selected model and only 'main' leaderboard entries
|
139 |
+
model_df = all_df[(all_df['model'] == model_name) & (all_df['leaderboard'] == 'main')].copy()
|
140 |
+
|
141 |
+
if model_df.empty:
|
142 |
+
return pd.DataFrame([{"Info": f"No 'main' leaderboard data available for the model: {model_name}"}])
|
143 |
+
|
144 |
+
# --- NEW: Add the Cluster Name column using the map ---
|
145 |
+
model_df['Cluster'] = model_df['task'].map(TASK_TO_CLUSTER_MAP)
|
146 |
+
|
147 |
+
# Create other descriptive columns
|
148 |
+
model_df['Task Name'] = model_df['task'].map(TASKS_LIST)
|
149 |
+
model_df['Metric'] = model_df['metric'].map(metrics_list)
|
150 |
+
model_df['Score'] = model_df['score'].apply(lambda x: f"{x:.2f}" if pd.notna(x) else "---")
|
151 |
+
|
152 |
+
# --- MODIFIED: Select the new 'Cluster' column for the final table ---
|
153 |
+
table = model_df[['Cluster', 'Task Name', 'task', 'Metric', 'Score']].rename(columns={'task': 'Task ID'})
|
154 |
+
|
155 |
+
# --- MODIFIED: Sort by Cluster first, then by Task Name ---
|
156 |
+
table = table.sort_values(by=['Cluster', 'Task Name']).reset_index(drop=True)
|
157 |
+
|
158 |
+
# Handle cases where a task might not be in a cluster
|
159 |
+
table['Cluster'].fillna('Uncategorized', inplace=True)
|
160 |
+
|
161 |
+
return table
|
162 |
+
|
163 |
+
def get_task_leaderboard(task_key):
|
164 |
+
"""
|
165 |
+
Generates a leaderboard for a specific task, showing model performance across all languages.
|
166 |
+
"""
|
167 |
+
# Filter the main DataFrame for the selected task
|
168 |
+
task_df = all_df[all_df['task'] == task_key].copy()
|
169 |
+
|
170 |
+
if task_df.empty:
|
171 |
+
return pd.DataFrame([{"Info": f"No data available for the task: {TASKS_LIST.get(task_key, task_key)}"}])
|
172 |
+
|
173 |
+
# Get the metric for this task to display later
|
174 |
+
metric_name = metrics_list.get(task_df['metric'].iloc[0], '')
|
175 |
|
176 |
+
# Create a user-friendly column name for each language/leaderboard
|
177 |
+
def make_lang_col(row):
|
178 |
+
lb = row['leaderboard']
|
179 |
+
if lb == 'main':
|
180 |
+
# Skip the 'main' leaderboard for task-specific views as it's an aggregate
|
181 |
+
return None
|
182 |
+
if '-' in lb:
|
183 |
+
pair_lang = lb.split('-')
|
184 |
+
# Handles cases where an ISO code might not be in our map
|
185 |
+
src_lang = LANG_ISO2NAME.get(pair_lang[0], pair_lang[0])
|
186 |
+
tgt_lang = LANG_ISO2NAME.get(pair_lang[1], pair_lang[1])
|
187 |
+
return f"{src_lang} to {tgt_lang}"
|
188 |
+
else:
|
189 |
+
return LANG_ISO2NAME.get(lb, lb)
|
190 |
+
if task_key not in ['lid']:
|
191 |
+
task_df['lang_col'] = task_df.apply(make_lang_col, axis=1)
|
192 |
+
task_df.dropna(subset=['lang_col'], inplace=True) # Remove rows where lang_col is None
|
193 |
+
|
194 |
+
if task_df.empty:
|
195 |
+
return pd.DataFrame([{"Info": f"No language-specific data for the task: {TASKS_LIST.get(task_key, task_key)}"}])
|
196 |
+
|
197 |
+
# Pivot the table to have models as rows and languages as columns
|
198 |
+
table = task_df.pivot_table(index='model', columns='lang_col', values='score', aggfunc='mean').reset_index()
|
199 |
+
else:
|
200 |
+
table = task_df.pivot_table(index='model', columns='task', values='score', aggfunc='mean').reset_index()
|
201 |
+
|
202 |
+
score_cols = [col for col in table.columns if col != 'model']
|
203 |
+
for col in score_cols:
|
204 |
+
table[col] = table[col].apply(lambda x: f"{x:.2f}" if isinstance(x, (int, float)) else x)
|
205 |
+
main_score_map = all_df[(all_df['task'] == task_key) & (all_df['leaderboard'] == 'main')].set_index('model')['score']
|
206 |
+
table.insert(1, 'Task Score', table['model'].map(main_score_map).apply(lambda x: f"{x:.2f}" if pd.notna(x) else "---"))
|
207 |
+
|
208 |
+
# Add ranking medals based on the "Task Score"
|
209 |
+
table = add_medals_to_models(table, score_col="Task Score")
|
210 |
+
|
211 |
+
# Rename columns to be more descriptive, including the metric
|
212 |
+
# rename_cols = {col: f"{col}<br>Metric: {metric_name}" for col in score_cols}
|
213 |
+
if task_key in ['belebele', 'ner', 'mgsm', 'mmlu']:
|
214 |
+
# rename_cols = {col: f"<div class='rotate_div'><br>{next(iter(LANGNAME2ISOS.get(col)))}</div>" for col in score_cols}
|
215 |
+
rename_cols = {col: f"<div class='rotate_div'><br>{col}</div>" for col in score_cols}
|
216 |
+
else:
|
217 |
+
rename_cols = {col: f"{col}" for col in score_cols}
|
218 |
+
table.rename(columns=rename_cols, inplace=True)
|
219 |
+
|
220 |
+
return table
|
221 |
+
|
222 |
def get_task_metric_map(df):
|
223 |
mapping = {}
|
224 |
for _, row in df.iterrows():
|
|
|
306 |
# Remove any column whose name contains "task"
|
307 |
drop_cols = [col for col in df.columns if "task" in col]
|
308 |
df = df.drop(columns=drop_cols, errors="ignore")
|
309 |
+
df.columns.name = None
|
310 |
+
html = df.to_html(index=False, escape=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
return html
|
312 |
|
313 |
|
314 |
|
315 |
cluster_tabs, main_overall_tab, all_df, metric_map = load_leaderboards()
|
316 |
|
317 |
+
# Get the list of unique model names for the new dropdown
|
318 |
+
MODEL_NAME_LIST = sorted(all_df['model'].unique()) if not all_df.empty else []
|
319 |
+
|
320 |
def get_lang_table(lang_name):
|
321 |
iso_codes = LANGNAME2ISOS.get(lang_name, [])
|
322 |
if not iso_codes:
|