Spaces:

fair-forward
/

evals-for-every-language

Running

App Files Files Community

David Pomerenke commited on Mar 21

Commit

723f963

1 Parent(s): b4a0c57

Process data for country map

Browse files

Files changed (5) hide show

evals/countries.py +50 -0
evals/languages.py +0 -9
evals/main.py +39 -7
frontend/public/results.json +0 -0
results.json +21 -21

evals/countries.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import re
+import xml.etree.ElementTree as ET
+from collections import defaultdict
+from language_data.population_data import LANGUAGE_SPEAKING_POPULATION
+from language_data.util import data_filename
+def get_population_data():
+    filename = data_filename("supplementalData.xml")
+    root = ET.fromstring(open(filename).read())
+    territories = root.findall("./territoryInfo/territory")
+    data = {}
+    for territory in territories:
+        t_code = territory.attrib["type"]
+        t_population = float(territory.attrib["population"])
+        data[t_code] = t_population
+    return data
+def population(bcp_47):
+    items = {
+        re.sub(r"^[a-z]+-", "", lang): pop
+        for lang, pop in LANGUAGE_SPEAKING_POPULATION.items()
+        if re.match(rf"^{bcp_47}-[A-Z]{{2}}$", lang)
+    }
+    return items
+def make_country_table(language_table):
+    countries = defaultdict(list)
+    for lang in language_table.itertuples():
+        for country, pop in population(lang.bcp_47).items():
+            countries[country].append(
+                {
+                    "name": lang.language_name,
+                    "bcp_47": lang.bcp_47,
+                    "population": pop,
+                    "score": lang.average,
+                }
+            )
+    for country, languages in countries.items():
+        pop = sum(entry["population"] for entry in languages)
+        score = sum(entry["score"] * entry["population"] for entry in languages) / pop
+        countries[country] = {
+            "score": score,
+            "languages": languages,
+        }
+    return countries

evals/languages.py CHANGED Viewed

@@ -46,15 +46,6 @@ scripts = pd.read_csv("data/ScriptCodes.csv").rename(
     columns={"Code": "iso15924", "English Name": "script_name"}
 )
-def population(bcp_47):
-    items = {
-        re.sub(r"^[a-z]+-", "", lang): pop
-        for lang, pop in LANGUAGE_SPEAKING_POPULATION.items()
-        if re.match(rf"^{bcp_47}-[A-Z]{{2}}$", lang)
-    }
-    return items
 def script_name(iso15924):
     return scripts[scripts["iso15924"] == iso15924]["script_name"].values[0]

     columns={"Code": "iso15924", "English Name": "script_name"}
 )
 def script_name(iso15924):
     return scripts[scripts["iso15924"] == iso15924]["script_name"].values[0]

evals/main.py CHANGED Viewed

@@ -3,11 +3,12 @@ import json
 import numpy as np
 import pandas as pd
-from rich import print
-from tqdm.asyncio import tqdm_asyncio
 from languages import languages
 from tasks import tasks
-from models import models, model_fast
 # ===== config =====
@@ -91,7 +92,20 @@ def make_model_table(df):
     df["provider"] = df["model"].str.split("/").str[0].apply(fmt_name)
     df["model"] = df["model"].str.split("/").str[1].apply(fmt_name)
     df["rank"] = df.index + 1
-    df = df[["rank", "provider", "model", "hf_id", "creation_date", "size", "type", "license", "average", *task_metrics]]
     return df
@@ -99,15 +113,31 @@ def make_language_table(df):
     df["task_metric"] = df["task"] + "_" + df["metric"]
     df = df.drop(columns=["task", "metric"])
     task_metrics = df["task_metric"].unique()
-    df = df.pivot(index="bcp_47", columns="task_metric", values="score").fillna(0).reset_index()
     df["average"] = df[task_metrics].mean(axis=1)
     for row in [*task_metrics, "average"]:
         df[row] = df[row].round(2)
     df = pd.merge(languages, df, on="bcp_47", how="outer")
     df = df.sort_values(by="speakers", ascending=False)
-    df = df[["language_name", "autonym", "speakers", "family", "average", "in_benchmark", *task_metrics]]
     return df
 async def main():
     results = await evaluate()
     results, lang_results, model_results, task_results = aggregate(results)
@@ -121,10 +151,12 @@ async def main():
         json.dump(all_results, f, indent=2, ensure_ascii=False)
     datasets_df = pd.read_json("data/datasets.json")
     all_tables = {
         "model_table": serialize(make_model_table(model_results)),
-        "language_table": serialize(make_language_table(lang_results)),
         "dataset_table": serialize(datasets_df),
     }
     with open("frontend/public/results.json", "w") as f:
         json.dump(all_tables, f, indent=2, ensure_ascii=False)

 import numpy as np
 import pandas as pd
+from countries import make_country_table
 from languages import languages
+from models import model_fast, models
+from rich import print
 from tasks import tasks
+from tqdm.asyncio import tqdm_asyncio
 # ===== config =====
     df["provider"] = df["model"].str.split("/").str[0].apply(fmt_name)
     df["model"] = df["model"].str.split("/").str[1].apply(fmt_name)
     df["rank"] = df.index + 1
+    df = df[
+        [
+            "rank",
+            "provider",
+            "model",
+            "hf_id",
+            "creation_date",
+            "size",
+            "type",
+            "license",
+            "average",
+            *task_metrics,
+        ]
+    ]
     return df
     df["task_metric"] = df["task"] + "_" + df["metric"]
     df = df.drop(columns=["task", "metric"])
     task_metrics = df["task_metric"].unique()
+    df = (
+        df.pivot(index="bcp_47", columns="task_metric", values="score")
+        .fillna(0)
+        .reset_index()
+    )
     df["average"] = df[task_metrics].mean(axis=1)
     for row in [*task_metrics, "average"]:
         df[row] = df[row].round(2)
     df = pd.merge(languages, df, on="bcp_47", how="outer")
     df = df.sort_values(by="speakers", ascending=False)
+    df = df[
+        [
+            "bcp_47",
+            "language_name",
+            "autonym",
+            "speakers",
+            "family",
+            "average",
+            "in_benchmark",
+            *task_metrics,
+        ]
+    ]
     return df
 async def main():
     results = await evaluate()
     results, lang_results, model_results, task_results = aggregate(results)
         json.dump(all_results, f, indent=2, ensure_ascii=False)
     datasets_df = pd.read_json("data/datasets.json")
+    language_table = make_language_table(lang_results)
     all_tables = {
         "model_table": serialize(make_model_table(model_results)),
+        "language_table": serialize(language_table),
         "dataset_table": serialize(datasets_df),
+        "countries": make_country_table(language_table),
     }
     with open("frontend/public/results.json", "w") as f:
         json.dump(all_tables, f, indent=2, ensure_ascii=False)

frontend/public/results.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

results.json CHANGED Viewed

@@ -616,7 +616,7 @@
       "family": "Indo-European",
       "flores_path": null,
       "fleurs_tag": null,
-      "commonvoice_hours": 13.0,
       "commonvoice_locale": "an",
       "in_benchmark": false,
       "task": null,
@@ -1992,7 +1992,7 @@
       "family": "Indo-European",
       "flores_path": "cat_Latn",
       "fleurs_tag": "ca_es",
-      "commonvoice_hours": 2842.0,
       "commonvoice_locale": "ca",
       "in_benchmark": true,
       "task": null,
@@ -2584,7 +2584,7 @@
       "family": "Indo-European",
       "flores_path": "deu_Latn",
       "fleurs_tag": "de_de",
-      "commonvoice_hours": 1359.0,
       "commonvoice_locale": "de",
       "in_benchmark": true,
       "task": null,
@@ -2904,7 +2904,7 @@
       "family": "Indo-European",
       "flores_path": "eng_Latn",
       "fleurs_tag": "en_us",
-      "commonvoice_hours": 2651.0,
       "commonvoice_locale": "en",
       "in_benchmark": true,
       "task": "classification",
@@ -2920,7 +2920,7 @@
       "family": "Indo-European",
       "flores_path": "eng_Latn",
       "fleurs_tag": "en_us",
-      "commonvoice_hours": 2651.0,
       "commonvoice_locale": "en",
       "in_benchmark": true,
       "task": "language_modeling",
@@ -2936,7 +2936,7 @@
       "family": "Indo-European",
       "flores_path": "eng_Latn",
       "fleurs_tag": "en_us",
-      "commonvoice_hours": 2651.0,
       "commonvoice_locale": "en",
       "in_benchmark": true,
       "task": "translation",
@@ -2952,7 +2952,7 @@
       "family": "Indo-European",
       "flores_path": "eng_Latn",
       "fleurs_tag": "en_us",
-      "commonvoice_hours": 2651.0,
       "commonvoice_locale": "en",
       "in_benchmark": true,
       "task": "translation",
@@ -3320,7 +3320,7 @@
       "family": "Indo-European",
       "flores_path": "fra_Latn",
       "fleurs_tag": "fr_fr",
-      "commonvoice_hours": 1052.0,
       "commonvoice_locale": "fr",
       "in_benchmark": true,
       "task": "classification",
@@ -3336,7 +3336,7 @@
       "family": "Indo-European",
       "flores_path": "fra_Latn",
       "fleurs_tag": "fr_fr",
-      "commonvoice_hours": 1052.0,
       "commonvoice_locale": "fr",
       "in_benchmark": true,
       "task": "language_modeling",
@@ -3352,7 +3352,7 @@
       "family": "Indo-European",
       "flores_path": "fra_Latn",
       "fleurs_tag": "fr_fr",
-      "commonvoice_hours": 1052.0,
       "commonvoice_locale": "fr",
       "in_benchmark": true,
       "task": "translation",
@@ -3368,7 +3368,7 @@
       "family": "Indo-European",
       "flores_path": "fra_Latn",
       "fleurs_tag": "fr_fr",
-      "commonvoice_hours": 1052.0,
       "commonvoice_locale": "fr",
       "in_benchmark": true,
       "task": "translation",
@@ -4952,7 +4952,7 @@
       "family": "Abkhaz-Adyge",
       "flores_path": null,
       "fleurs_tag": null,
-      "commonvoice_hours": 16.0,
       "commonvoice_locale": "kbd",
       "in_benchmark": false,
       "task": null,
@@ -5752,7 +5752,7 @@
       "family": "Indo-European",
       "flores_path": null,
       "fleurs_tag": null,
-      "commonvoice_hours": 2.9,
       "commonvoice_locale": "kw",
       "in_benchmark": false,
       "task": null,
@@ -6296,7 +6296,7 @@
       "family": "Indo-European",
       "flores_path": "ltg_Latn",
       "fleurs_tag": null,
-      "commonvoice_hours": 28.0,
       "commonvoice_locale": "ltg",
       "in_benchmark": true,
       "task": null,
@@ -8360,7 +8360,7 @@
       "family": "Indo-European",
       "flores_path": null,
       "fleurs_tag": "ps_af",
-      "commonvoice_hours": 79.0,
       "commonvoice_locale": "ps",
       "in_benchmark": false,
       "task": null,
@@ -8952,7 +8952,7 @@
       "family": "Turkic",
       "flores_path": null,
       "fleurs_tag": null,
-      "commonvoice_hours": 9.5,
       "commonvoice_locale": "sah",
       "in_benchmark": false,
       "task": null,
@@ -9224,7 +9224,7 @@
       "family": null,
       "flores_path": null,
       "fleurs_tag": null,
-      "commonvoice_hours": 1.2,
       "commonvoice_locale": "sei",
       "in_benchmark": false,
       "task": null,
@@ -9352,7 +9352,7 @@
       "family": "Indo-European",
       "flores_path": "slk_Latn",
       "fleurs_tag": "sk_sk",
-      "commonvoice_hours": 46.0,
       "commonvoice_locale": "sk",
       "in_benchmark": true,
       "task": null,
@@ -10200,7 +10200,7 @@
       "family": "Afro-Asiatic",
       "flores_path": null,
       "fleurs_tag": null,
-      "commonvoice_hours": 7.3,
       "commonvoice_locale": "tig",
       "in_benchmark": false,
       "task": null,
@@ -10712,7 +10712,7 @@
       "family": "Turkic",
       "flores_path": "uig_Arab",
       "fleurs_tag": null,
-      "commonvoice_hours": 364.0,
       "commonvoice_locale": "ug",
       "in_benchmark": true,
       "task": null,
@@ -10728,7 +10728,7 @@
       "family": "Indo-European",
       "flores_path": "ukr_Cyrl",
       "fleurs_tag": "uk_ua",
-      "commonvoice_hours": 98.0,
       "commonvoice_locale": "uk",
       "in_benchmark": true,
       "task": null,

       "family": "Indo-European",
       "flores_path": null,
       "fleurs_tag": null,
+      "commonvoice_hours": 14.0,
       "commonvoice_locale": "an",
       "in_benchmark": false,
       "task": null,
       "family": "Indo-European",
       "flores_path": "cat_Latn",
       "fleurs_tag": "ca_es",
+      "commonvoice_hours": 2844.0,
       "commonvoice_locale": "ca",
       "in_benchmark": true,
       "task": null,
       "family": "Indo-European",
       "flores_path": "deu_Latn",
       "fleurs_tag": "de_de",
+      "commonvoice_hours": 1360.0,
       "commonvoice_locale": "de",
       "in_benchmark": true,
       "task": null,
       "family": "Indo-European",
       "flores_path": "eng_Latn",
       "fleurs_tag": "en_us",
+      "commonvoice_hours": 2653.0,
       "commonvoice_locale": "en",
       "in_benchmark": true,
       "task": "classification",
       "family": "Indo-European",
       "flores_path": "eng_Latn",
       "fleurs_tag": "en_us",
+      "commonvoice_hours": 2653.0,
       "commonvoice_locale": "en",
       "in_benchmark": true,
       "task": "language_modeling",
       "family": "Indo-European",
       "flores_path": "eng_Latn",
       "fleurs_tag": "en_us",
+      "commonvoice_hours": 2653.0,
       "commonvoice_locale": "en",
       "in_benchmark": true,
       "task": "translation",
       "family": "Indo-European",
       "flores_path": "eng_Latn",
       "fleurs_tag": "en_us",
+      "commonvoice_hours": 2653.0,
       "commonvoice_locale": "en",
       "in_benchmark": true,
       "task": "translation",
       "family": "Indo-European",
       "flores_path": "fra_Latn",
       "fleurs_tag": "fr_fr",
+      "commonvoice_hours": 1053.0,
       "commonvoice_locale": "fr",
       "in_benchmark": true,
       "task": "classification",
       "family": "Indo-European",
       "flores_path": "fra_Latn",
       "fleurs_tag": "fr_fr",
+      "commonvoice_hours": 1053.0,
       "commonvoice_locale": "fr",
       "in_benchmark": true,
       "task": "language_modeling",
       "family": "Indo-European",
       "flores_path": "fra_Latn",
       "fleurs_tag": "fr_fr",
+      "commonvoice_hours": 1053.0,
       "commonvoice_locale": "fr",
       "in_benchmark": true,
       "task": "translation",
       "family": "Indo-European",
       "flores_path": "fra_Latn",
       "fleurs_tag": "fr_fr",
+      "commonvoice_hours": 1053.0,
       "commonvoice_locale": "fr",
       "in_benchmark": true,
       "task": "translation",
       "family": "Abkhaz-Adyge",
       "flores_path": null,
       "fleurs_tag": null,
+      "commonvoice_hours": 18.0,
       "commonvoice_locale": "kbd",
       "in_benchmark": false,
       "task": null,
       "family": "Indo-European",
       "flores_path": null,
       "fleurs_tag": null,
+      "commonvoice_hours": 3.4,
       "commonvoice_locale": "kw",
       "in_benchmark": false,
       "task": null,
       "family": "Indo-European",
       "flores_path": "ltg_Latn",
       "fleurs_tag": null,
+      "commonvoice_hours": 29.0,
       "commonvoice_locale": "ltg",
       "in_benchmark": true,
       "task": null,
       "family": "Indo-European",
       "flores_path": null,
       "fleurs_tag": "ps_af",
+      "commonvoice_hours": 80.0,
       "commonvoice_locale": "ps",
       "in_benchmark": false,
       "task": null,
       "family": "Turkic",
       "flores_path": null,
       "fleurs_tag": null,
+      "commonvoice_hours": 11.0,
       "commonvoice_locale": "sah",
       "in_benchmark": false,
       "task": null,
       "family": null,
       "flores_path": null,
       "fleurs_tag": null,
+      "commonvoice_hours": 1.4,
       "commonvoice_locale": "sei",
       "in_benchmark": false,
       "task": null,
       "family": "Indo-European",
       "flores_path": "slk_Latn",
       "fleurs_tag": "sk_sk",
+      "commonvoice_hours": 47.0,
       "commonvoice_locale": "sk",
       "in_benchmark": true,
       "task": null,
       "family": "Afro-Asiatic",
       "flores_path": null,
       "fleurs_tag": null,
+      "commonvoice_hours": 11.0,
       "commonvoice_locale": "tig",
       "in_benchmark": false,
       "task": null,
       "family": "Turkic",
       "flores_path": "uig_Arab",
       "fleurs_tag": null,
+      "commonvoice_hours": 365.0,
       "commonvoice_locale": "ug",
       "in_benchmark": true,
       "task": null,
       "family": "Indo-European",
       "flores_path": "ukr_Cyrl",
       "fleurs_tag": "uk_ua",
+      "commonvoice_hours": 99.0,
       "commonvoice_locale": "uk",
       "in_benchmark": true,
       "task": null,