|
from dataclasses import dataclass |
|
from enum import Enum |
|
|
|
@dataclass |
|
class TaskDetails: |
|
name: str |
|
display_name: str = "" |
|
symbol: str = "" |
|
|
|
|
|
class TaskType(Enum): |
|
NLU = TaskDetails("nlu", "NLU", "🧠") |
|
NLG = TaskDetails("nlg", "NLG", "✍️") |
|
|
|
|
|
@dataclass |
|
class Task: |
|
benchmark: str |
|
metric: str |
|
col_name: str |
|
task_type: TaskType |
|
is_primary_metric: bool = True |
|
|
|
|
|
|
|
|
|
class Tasks(Enum): |
|
|
|
task0 = Task("sentiment_mlt", "f1", "Sentiment Analysis (F1)", TaskType.NLU) |
|
task1 = Task("sib200_mlt", "f1", "SIB200 (F1)", TaskType.NLU) |
|
task2 = Task("taxi1500_mlt", "f1", "Taxi1500 (F1)", TaskType.NLU) |
|
task3 = Task("maltese_news_categories", "loglikelihood", "Maltese News Categories (F1)", TaskType.NLU) |
|
task4 = Task("multieurlex_mlt", "loglikelihood", "MultiEURLEX (F1)", TaskType.NLU) |
|
task5 = Task("belebele_mlt", "acc", "Belebele (Accuracy)", TaskType.NLU) |
|
task6 = Task("opus100_eng-mlt", "bleu", "OPUS-100 EN→MT (BLEU)", TaskType.NLG, False) |
|
task7 = Task("opus100_eng-mlt", "chrf", "OPUS-100 EN→MT (ChrF)", TaskType.NLG) |
|
task8 = Task("flores200_eng-mlt", "bleu", "Flores-200 EN→MT (BLEU)", TaskType.NLG, False) |
|
task9 = Task("flores200_eng-mlt", "chrf", "Flores-200 EN→MT (ChrF)", TaskType.NLG) |
|
task10 = Task("webnlg_mlt", "chrf", "WebNLG (ChrF)", TaskType.NLG) |
|
task11 = Task("webnlg_mlt", "rouge", "WebNLG (Rouge-L)", TaskType.NLG, False) |
|
task12 = Task("eurlexsum_mlt", "chrf", "EUR-Lex-Sum (ChrF)", TaskType.NLG, False) |
|
task13 = Task("eurlexsum_mlt", "rouge", "EUR-Lex-Sum (Rouge-L)", TaskType.NLG) |
|
task14 = Task("maltese_news_headlines", "chrf", "Maltese News Headlines (ChrF)", TaskType.NLG, False) |
|
task15 = Task("maltese_news_headlines", "rouge", "Maltese News Headlines (Rouge-L)", TaskType.NLG) |
|
|
|
NUM_FEWSHOT = 0 |
|
|
|
|
|
|
|
|
|
|
|
TITLE = """<h1 align="center" id="space-title">🇲🇹 MELABench Leaderboard</h1>""" |
|
|
|
|
|
INTRODUCTION_TEXT = """ |
|
A Maltese Evaluation Language Benchmark |
|
""" |
|
|
|
|
|
LLM_BENCHMARKS_TEXT = f""" |
|
## How it works |
|
|
|
## Reproducibility |
|
To reproduce our results, here is the commands you can run: |
|
|
|
""" |
|
|
|
EVALUATION_QUEUE_TEXT = """ |
|
To include new results on this benchmark, follow the instructions on our [GitHub Repository](https://github.com/MLRS/MELABench/tree/main/prompting). |
|
You can then upload the output files which should include the configuration/results file and all the prediction files. |
|
In addition, we ask for additional metadata about model training. |
|
""" |
|
|
|
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" |
|
CITATION_BUTTON_TEXT = r""" |
|
""" |
|
|