Spaces:
Runtime error
Runtime error
Commit
·
64dd40c
1
Parent(s):
4af9e8d
Add embedding dimensions
Browse files
app.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
from datasets import load_dataset
|
| 2 |
import gradio as gr
|
| 3 |
from huggingface_hub import HfApi, hf_hub_download
|
|
@@ -193,6 +195,35 @@ EXTERNAL_MODEL_TO_LINK = {
|
|
| 193 |
"paraphrase-multilingual-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
| 194 |
}
|
| 195 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
|
| 197 |
EXTERNAL_MODEL_RESULTS = {model: {k: {v: []} for k, v in TASK_TO_METRIC.items()} for model in EXTERNAL_MODELS}
|
| 198 |
|
|
@@ -236,8 +267,22 @@ for model in EXTERNAL_MODELS:
|
|
| 236 |
ds_dict = {k: round(v, 2) for k, v in zip(ds_dict["mteb_dataset_name_with_lang"], ds_dict["score"])}
|
| 237 |
EXTERNAL_MODEL_RESULTS[model][task][metric].append({**base_dict, **ds_dict})
|
| 238 |
|
| 239 |
-
|
| 240 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
api = HfApi()
|
| 242 |
models = api.list_models(filter="mteb")
|
| 243 |
# Initialize list to models that we cannot fetch metadata from
|
|
@@ -252,6 +297,7 @@ def get_mteb_data(tasks=["Clustering"], langs=[], fillna=True, task_to_metric=TA
|
|
| 252 |
res = {k: v for d in results_list for k, v in d.items()}
|
| 253 |
# Model & at least one result
|
| 254 |
if len(res) > 1:
|
|
|
|
| 255 |
df_list.append(res)
|
| 256 |
|
| 257 |
for model in models:
|
|
@@ -279,6 +325,8 @@ def get_mteb_data(tasks=["Clustering"], langs=[], fillna=True, task_to_metric=TA
|
|
| 279 |
out = [{res["dataset"]["name"].replace("MTEB ", ""): [round(score["value"], 2) for score in res["metrics"] if score["type"] == task_to_metric.get(res["task"]["type"])][0]} for res in task_results]
|
| 280 |
out = {k: v for d in out for k, v in d.items()}
|
| 281 |
out["Model"] = make_clickable_model(model.modelId)
|
|
|
|
|
|
|
| 282 |
df_list.append(out)
|
| 283 |
df = pd.DataFrame(df_list)
|
| 284 |
# Put 'Model' column first
|
|
@@ -302,7 +350,8 @@ def get_mteb_average():
|
|
| 302 |
"Summarization",
|
| 303 |
],
|
| 304 |
langs=["en", "en-en"],
|
| 305 |
-
fillna=False
|
|
|
|
| 306 |
)
|
| 307 |
# Approximation (Missing Bitext Mining & including some nans)
|
| 308 |
NUM_SCORES = DATA_OVERALL.shape[0] * DATA_OVERALL.shape[1]
|
|
@@ -335,7 +384,7 @@ def get_mteb_average():
|
|
| 335 |
DATA_STS_EN = DATA_OVERALL[["Model"] + TASK_LIST_STS]
|
| 336 |
DATA_SUMMARIZATION = DATA_OVERALL[["Model"] + TASK_LIST_SUMMARIZATION]
|
| 337 |
|
| 338 |
-
DATA_OVERALL = DATA_OVERALL[["Rank", "Model", f"Average ({len(TASK_LIST_EN)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL)} datasets)", f"STS Average ({len(TASK_LIST_STS)} datasets)", f"Summarization Average ({len(TASK_LIST_SUMMARIZATION)} dataset)"]]
|
| 339 |
|
| 340 |
return DATA_OVERALL
|
| 341 |
|
|
@@ -377,7 +426,7 @@ with block:
|
|
| 377 |
**Bitext Mining Leaderboard 🎌**
|
| 378 |
|
| 379 |
- **Metric:** [F1](https://huggingface.co/spaces/evaluate-metric/f1)
|
| 380 |
-
- **Languages:**
|
| 381 |
""")
|
| 382 |
with gr.Row():
|
| 383 |
data_bitext_mining = gr.components.Dataframe(
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
|
| 3 |
from datasets import load_dataset
|
| 4 |
import gradio as gr
|
| 5 |
from huggingface_hub import HfApi, hf_hub_download
|
|
|
|
| 195 |
"paraphrase-multilingual-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
| 196 |
}
|
| 197 |
|
| 198 |
+
EXTERNAL_MODEL_TO_DIM = {
|
| 199 |
+
"LASER2": 1024,
|
| 200 |
+
"LaBSE": 768,
|
| 201 |
+
"all-MiniLM-L12-v2": 384,
|
| 202 |
+
"all-MiniLM-L6-v2": 384,
|
| 203 |
+
"all-mpnet-base-v2": 768,
|
| 204 |
+
"allenai-specter": 768,
|
| 205 |
+
"bert-base-uncased": 768,
|
| 206 |
+
"contriever-base-msmarco": 768,
|
| 207 |
+
"glove.6B.300d": 300,
|
| 208 |
+
"gtr-t5-base": 768,
|
| 209 |
+
"gtr-t5-large": 768,
|
| 210 |
+
"gtr-t5-xl": 768,
|
| 211 |
+
"gtr-t5-xxl": 768,
|
| 212 |
+
"komninos": 300,
|
| 213 |
+
"msmarco-bert-co-condensor": 768,
|
| 214 |
+
"paraphrase-multilingual-MiniLM-L12-v2": 384,
|
| 215 |
+
"paraphrase-multilingual-mpnet-base-v2": 768,
|
| 216 |
+
"sentence-t5-base": 768,
|
| 217 |
+
"sentence-t5-large": 768,
|
| 218 |
+
"sentence-t5-xl": 768,
|
| 219 |
+
"sentence-t5-xxl": 768,
|
| 220 |
+
"sup-simcse-bert-base-uncased": 768,
|
| 221 |
+
"text-similarity-ada-001": 1024,
|
| 222 |
+
"text-search-ada-query-001": 1024,
|
| 223 |
+
"text-search-ada-doc-001": 1024,
|
| 224 |
+
"unsup-simcse-bert-base-uncased": 768,
|
| 225 |
+
}
|
| 226 |
+
|
| 227 |
|
| 228 |
EXTERNAL_MODEL_RESULTS = {model: {k: {v: []} for k, v in TASK_TO_METRIC.items()} for model in EXTERNAL_MODELS}
|
| 229 |
|
|
|
|
| 267 |
ds_dict = {k: round(v, 2) for k, v in zip(ds_dict["mteb_dataset_name_with_lang"], ds_dict["score"])}
|
| 268 |
EXTERNAL_MODEL_RESULTS[model][task][metric].append({**base_dict, **ds_dict})
|
| 269 |
|
| 270 |
+
def get_emb_dim(model):
|
| 271 |
+
filenames = [sib.rfilename for sib in model.siblings]
|
| 272 |
+
dim = ""
|
| 273 |
+
if "1_Pooling/config.json" in filenames:
|
| 274 |
+
st_config_path = hf_hub_download(model.modelId, filename="1_Pooling/config.json")
|
| 275 |
+
dim = json.load(open(st_config_path)).get("word_embedding_dimension", "")
|
| 276 |
+
elif "2_Pooling/config.json" in filenames:
|
| 277 |
+
st_config_path = hf_hub_download(model.modelId, filename="2_Pooling/config.json")
|
| 278 |
+
dim = json.load(open(st_config_path)).get("word_embedding_dimension", "")
|
| 279 |
+
elif "config.json" in filenames:
|
| 280 |
+
config_path = hf_hub_download(model.modelId, filename="config.json")
|
| 281 |
+
dim = json.load(open(config_path)).get("hidden_dim", "")
|
| 282 |
+
return dim
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
def get_mteb_data(tasks=["Clustering"], langs=[], fillna=True, add_emb_dim=False, task_to_metric=TASK_TO_METRIC):
|
| 286 |
api = HfApi()
|
| 287 |
models = api.list_models(filter="mteb")
|
| 288 |
# Initialize list to models that we cannot fetch metadata from
|
|
|
|
| 297 |
res = {k: v for d in results_list for k, v in d.items()}
|
| 298 |
# Model & at least one result
|
| 299 |
if len(res) > 1:
|
| 300 |
+
res["Embedding Dimensions"] = EXTERNAL_MODEL_TO_DIM.get(model, "")
|
| 301 |
df_list.append(res)
|
| 302 |
|
| 303 |
for model in models:
|
|
|
|
| 325 |
out = [{res["dataset"]["name"].replace("MTEB ", ""): [round(score["value"], 2) for score in res["metrics"] if score["type"] == task_to_metric.get(res["task"]["type"])][0]} for res in task_results]
|
| 326 |
out = {k: v for d in out for k, v in d.items()}
|
| 327 |
out["Model"] = make_clickable_model(model.modelId)
|
| 328 |
+
if add_emb_dim:
|
| 329 |
+
out["Embedding Dimensions"] = get_emb_dim(model)
|
| 330 |
df_list.append(out)
|
| 331 |
df = pd.DataFrame(df_list)
|
| 332 |
# Put 'Model' column first
|
|
|
|
| 350 |
"Summarization",
|
| 351 |
],
|
| 352 |
langs=["en", "en-en"],
|
| 353 |
+
fillna=False,
|
| 354 |
+
add_emb_dim=True,
|
| 355 |
)
|
| 356 |
# Approximation (Missing Bitext Mining & including some nans)
|
| 357 |
NUM_SCORES = DATA_OVERALL.shape[0] * DATA_OVERALL.shape[1]
|
|
|
|
| 384 |
DATA_STS_EN = DATA_OVERALL[["Model"] + TASK_LIST_STS]
|
| 385 |
DATA_SUMMARIZATION = DATA_OVERALL[["Model"] + TASK_LIST_SUMMARIZATION]
|
| 386 |
|
| 387 |
+
DATA_OVERALL = DATA_OVERALL[["Rank", "Model", "Embedding Dimensions", f"Average ({len(TASK_LIST_EN)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL)} datasets)", f"STS Average ({len(TASK_LIST_STS)} datasets)", f"Summarization Average ({len(TASK_LIST_SUMMARIZATION)} dataset)"]]
|
| 388 |
|
| 389 |
return DATA_OVERALL
|
| 390 |
|
|
|
|
| 426 |
**Bitext Mining Leaderboard 🎌**
|
| 427 |
|
| 428 |
- **Metric:** [F1](https://huggingface.co/spaces/evaluate-metric/f1)
|
| 429 |
+
- **Languages:** 117
|
| 430 |
""")
|
| 431 |
with gr.Row():
|
| 432 |
data_bitext_mining = gr.components.Dataframe(
|