davanstrien HF Staff commited on
Commit
bffa030
·
1 Parent(s): 7709561

Update language summary and add dataset count for

Browse files
Files changed (1) hide show
  1. app.py +11 -4
app.py CHANGED
@@ -41,10 +41,12 @@ for lang in tqdm(freqs.keys()):
41
  list_models(filter=ModelFilter(language=lang, task="token-classification"))
42
  )
43
  models_for_lang_any_task = list(list_models(filter=ModelFilter(language=lang)))
 
44
  if not models_for_lang_with_task_token_classification:
45
  data = {
46
  "language": lang,
47
- "datasets": freqs[lang],
 
48
  "token_classification_models": len(
49
  models_for_lang_with_task_token_classification
50
  ),
@@ -58,8 +60,13 @@ len(no_model)
58
  df = pd.DataFrame(no_model)
59
 
60
  df = df.sort_values(
61
- by=["datasets", "token_classification_models", "all_models"],
62
- ascending=[False, True, True],
 
 
 
 
 
63
  )
64
 
65
 
@@ -68,7 +75,7 @@ def report_summary():
68
  for row in df.head(20).itertuples():
69
  language = row[1]
70
  summary += f"# Summary for language: {language}\n"
71
- summary += f"This language has {(row[2])} token classification datasets, it has {row[3]} token classification models, and {row[4]} models overall.\n"
72
  summary += f"- [Datasets for token classification task for {language}](https://huggingface.co/datasets?task_categories=task_categories:token-classification&language=language:{language})\n"
73
  summary += f"- [Token classification models for {language}](https://huggingface.co/models?task_categories=task_categories:token-classification&language=language:{language})\n"
74
  summary += f"- [All models for {language}](https://huggingface.co/models?language={language}&sort=trending)\n"
 
41
  list_models(filter=ModelFilter(language=lang, task="token-classification"))
42
  )
43
  models_for_lang_any_task = list(list_models(filter=ModelFilter(language=lang)))
44
+ datasets_for_lang_any_task = list(list_datasets(filter=f"language:{lang}"))
45
  if not models_for_lang_with_task_token_classification:
46
  data = {
47
  "language": lang,
48
+ "datasets_for_token_classification": freqs[lang],
49
+ "datasets": len(datasets_for_lang_any_task),
50
  "token_classification_models": len(
51
  models_for_lang_with_task_token_classification
52
  ),
 
60
  df = pd.DataFrame(no_model)
61
 
62
  df = df.sort_values(
63
+ by=[
64
+ "datasets_for_token_classification",
65
+ "datasets",
66
+ "token_classification_models",
67
+ "all_models",
68
+ ],
69
+ ascending=[False, False, True, True],
70
  )
71
 
72
 
 
75
  for row in df.head(20).itertuples():
76
  language = row[1]
77
  summary += f"# Summary for language: {language}\n"
78
+ summary += f"This language has {(row[2])} token classification datasets, {row[3]} datasets overall, {row[4]} token classification models, and {row[5]} models overall.\n"
79
  summary += f"- [Datasets for token classification task for {language}](https://huggingface.co/datasets?task_categories=task_categories:token-classification&language=language:{language})\n"
80
  summary += f"- [Token classification models for {language}](https://huggingface.co/models?task_categories=task_categories:token-classification&language=language:{language})\n"
81
  summary += f"- [All models for {language}](https://huggingface.co/models?language={language}&sort=trending)\n"