Spaces:
Sleeping
Sleeping
Commit
·
bffa030
1
Parent(s):
7709561
Update language summary and add dataset count for
Browse files
app.py
CHANGED
@@ -41,10 +41,12 @@ for lang in tqdm(freqs.keys()):
|
|
41 |
list_models(filter=ModelFilter(language=lang, task="token-classification"))
|
42 |
)
|
43 |
models_for_lang_any_task = list(list_models(filter=ModelFilter(language=lang)))
|
|
|
44 |
if not models_for_lang_with_task_token_classification:
|
45 |
data = {
|
46 |
"language": lang,
|
47 |
-
"
|
|
|
48 |
"token_classification_models": len(
|
49 |
models_for_lang_with_task_token_classification
|
50 |
),
|
@@ -58,8 +60,13 @@ len(no_model)
|
|
58 |
df = pd.DataFrame(no_model)
|
59 |
|
60 |
df = df.sort_values(
|
61 |
-
by=[
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
63 |
)
|
64 |
|
65 |
|
@@ -68,7 +75,7 @@ def report_summary():
|
|
68 |
for row in df.head(20).itertuples():
|
69 |
language = row[1]
|
70 |
summary += f"# Summary for language: {language}\n"
|
71 |
-
summary += f"This language has {(row[2])} token classification datasets,
|
72 |
summary += f"- [Datasets for token classification task for {language}](https://huggingface.co/datasets?task_categories=task_categories:token-classification&language=language:{language})\n"
|
73 |
summary += f"- [Token classification models for {language}](https://huggingface.co/models?task_categories=task_categories:token-classification&language=language:{language})\n"
|
74 |
summary += f"- [All models for {language}](https://huggingface.co/models?language={language}&sort=trending)\n"
|
|
|
41 |
list_models(filter=ModelFilter(language=lang, task="token-classification"))
|
42 |
)
|
43 |
models_for_lang_any_task = list(list_models(filter=ModelFilter(language=lang)))
|
44 |
+
datasets_for_lang_any_task = list(list_datasets(filter=f"language:{lang}"))
|
45 |
if not models_for_lang_with_task_token_classification:
|
46 |
data = {
|
47 |
"language": lang,
|
48 |
+
"datasets_for_token_classification": freqs[lang],
|
49 |
+
"datasets": len(datasets_for_lang_any_task),
|
50 |
"token_classification_models": len(
|
51 |
models_for_lang_with_task_token_classification
|
52 |
),
|
|
|
60 |
df = pd.DataFrame(no_model)
|
61 |
|
62 |
df = df.sort_values(
|
63 |
+
by=[
|
64 |
+
"datasets_for_token_classification",
|
65 |
+
"datasets",
|
66 |
+
"token_classification_models",
|
67 |
+
"all_models",
|
68 |
+
],
|
69 |
+
ascending=[False, False, True, True],
|
70 |
)
|
71 |
|
72 |
|
|
|
75 |
for row in df.head(20).itertuples():
|
76 |
language = row[1]
|
77 |
summary += f"# Summary for language: {language}\n"
|
78 |
+
summary += f"This language has {(row[2])} token classification datasets, {row[3]} datasets overall, {row[4]} token classification models, and {row[5]} models overall.\n"
|
79 |
summary += f"- [Datasets for token classification task for {language}](https://huggingface.co/datasets?task_categories=task_categories:token-classification&language=language:{language})\n"
|
80 |
summary += f"- [Token classification models for {language}](https://huggingface.co/models?task_categories=task_categories:token-classification&language=language:{language})\n"
|
81 |
summary += f"- [All models for {language}](https://huggingface.co/models?language={language}&sort=trending)\n"
|