Spaces:
Runtime error
Runtime error
Commit
·
404b92c
1
Parent(s):
3219cef
fixed update
Browse files- refresh.py +10 -7
refresh.py
CHANGED
|
@@ -323,11 +323,17 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
|
|
| 323 |
df['MLSUMClusteringS2S (fr)'] = df['MLSUMClusteringS2S (fr)'].fillna(df['MLSUMClusteringS2S'])
|
| 324 |
datasets.remove('MLSUMClusteringS2S')
|
| 325 |
if ('PawsXPairClassification (fr)' in datasets) and ('PawsX (fr)' in cols):
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
else:
|
| 329 |
df['PawsXPairClassification (fr)'] = df['PawsX (fr)']
|
|
|
|
|
|
|
|
|
|
| 330 |
datasets.remove('PawsX (fr)')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
# Filter invalid columns
|
| 332 |
cols = [col for col in cols if col in base_columns + datasets]
|
| 333 |
i = 0
|
|
@@ -356,10 +362,7 @@ def get_mteb_average(task_dict: dict):
|
|
| 356 |
)
|
| 357 |
# Debugging:
|
| 358 |
# DATA_OVERALL.to_csv("overall.csv")
|
| 359 |
-
|
| 360 |
-
DATA_OVERALL.insert(1, f"Average ({len(all_tasks)} datasets)", DATA_OVERALL[all_tasks].mean(axis=1, skipna=False))
|
| 361 |
-
except Exception as e:
|
| 362 |
-
breakpoint()
|
| 363 |
for i, (task_category, task_category_list) in enumerate(task_dict.items()):
|
| 364 |
DATA_OVERALL.insert(i+2, f"{task_category} Average ({len(task_category_list)} datasets)", DATA_OVERALL[task_category_list].mean(axis=1, skipna=False))
|
| 365 |
DATA_OVERALL.sort_values(f"Average ({len(all_tasks)} datasets)", ascending=False, inplace=True)
|
|
|
|
| 323 |
df['MLSUMClusteringS2S (fr)'] = df['MLSUMClusteringS2S (fr)'].fillna(df['MLSUMClusteringS2S'])
|
| 324 |
datasets.remove('MLSUMClusteringS2S')
|
| 325 |
if ('PawsXPairClassification (fr)' in datasets) and ('PawsX (fr)' in cols):
|
| 326 |
+
# for the first bit no model has it, hence no column for it. We can remove this in a month or so
|
| 327 |
+
if "PawsXPairClassification (fr)" not in cols:
|
|
|
|
| 328 |
df['PawsXPairClassification (fr)'] = df['PawsX (fr)']
|
| 329 |
+
else:
|
| 330 |
+
df['PawsXPairClassification (fr)'] = df['PawsXPairClassification (fr)'].fillna(df['PawsX (fr)'])
|
| 331 |
+
# make all the columns the same
|
| 332 |
datasets.remove('PawsX (fr)')
|
| 333 |
+
cols.remove('PawsX (fr)')
|
| 334 |
+
df.drop(columns=['PawsX (fr)'], inplace=True)
|
| 335 |
+
cols.append('PawsXPairClassification (fr)')
|
| 336 |
+
|
| 337 |
# Filter invalid columns
|
| 338 |
cols = [col for col in cols if col in base_columns + datasets]
|
| 339 |
i = 0
|
|
|
|
| 362 |
)
|
| 363 |
# Debugging:
|
| 364 |
# DATA_OVERALL.to_csv("overall.csv")
|
| 365 |
+
DATA_OVERALL.insert(1, f"Average ({len(all_tasks)} datasets)", DATA_OVERALL[all_tasks].mean(axis=1, skipna=False))
|
|
|
|
|
|
|
|
|
|
| 366 |
for i, (task_category, task_category_list) in enumerate(task_dict.items()):
|
| 367 |
DATA_OVERALL.insert(i+2, f"{task_category} Average ({len(task_category_list)} datasets)", DATA_OVERALL[task_category_list].mean(axis=1, skipna=False))
|
| 368 |
DATA_OVERALL.sort_values(f"Average ({len(all_tasks)} datasets)", ascending=False, inplace=True)
|