|
from dataclasses import dataclass |
|
from enum import Enum |
|
|
|
@dataclass |
|
class EvalDimension: |
|
metric: str |
|
col_name: str |
|
|
|
|
|
|
|
|
|
class EvalDimensions(Enum): |
|
d0 = EvalDimension("speed", "Speed (words/sec)") |
|
d1 = EvalDimension("contamination_score", "Contamination Score") |
|
d2 = EvalDimension("paraphrasing", "Paraphrasing") |
|
d3 = EvalDimension("sentiment analysis", "Sentiment Analysis") |
|
d4 = EvalDimension("coding", "Coding") |
|
d5 = EvalDimension("function calling", "Function Calling") |
|
d6 = EvalDimension("rag qa", "RAG QA") |
|
d7 = EvalDimension("reading comprehension", "Reading Comprehension") |
|
d8 = EvalDimension("entity extraction", "Entity Extraction") |
|
d9 = EvalDimension("summarization", "Summarization") |
|
d10 = EvalDimension("long context", "Long Context") |
|
d11 = EvalDimension("mmlu", "MMLU") |
|
d12 = EvalDimension("arabic language & grammar", "Arabic Language & Grammar") |
|
d13 = EvalDimension("general knowledge", "General Knowledge") |
|
d14 = EvalDimension("translation (incl dialects)", "Translation (incl Dialects)") |
|
d15 = EvalDimension("trust & safety","Trust & Safety") |
|
d16 = EvalDimension("writing (incl dialects)", "Writing (incl Dialects)") |
|
d17 = EvalDimension("dialect detection", "Dialect Detection") |
|
d18 = EvalDimension("reasoning & math", "Reasoning & Math") |
|
d19 = EvalDimension("diacritization", "Diacritization") |
|
d20 = EvalDimension("instruction following", "Instruction Following") |
|
d21 = EvalDimension("transliteration", "Transliteration") |
|
d22 = EvalDimension("structuring", "Structuring") |
|
d23 = EvalDimension("hallucination", "Hallucination") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
NUM_FEWSHOT = 0 |
|
|
|
|
|
|
|
|
|
|
|
TITLE = """<div ><img class='abl_header_image' src='https://huggingface.co/spaces/silma-ai/Arabic-LLM-Broad-Leaderboard/resolve/main/src/images/abl_logo.png' ></div>""" |
|
|
|
|
|
INTRODUCTION_TEXT = """ |
|
<h1 style='width: 100%;text-align: center;' id="space-title">Arabic Broad Leaderboard (ABL) - The first comprehensive Leaderboard for Arabic LLMs</h1> |
|
ABL is the official Leaderboard of <a href='https://huggingface.co/datasets/silma-ai/arabic-broad-benchmark' target='_blank'>Arabic Broad Benchmark (ABB)</a>. |
|
With advanced features and innovative visualizations, we provide the community with a comprehensive view of the capabilities of Arabic models, showcasing their speed, diverse skills while also defending against benchmarking contamination. |
|
The benchmark consists of <b>450 high quality human-validated questions</b> sampled from <b>63 Arabic benchmarking datasets</b>, evaluating <b>22 categories and skills</b>. |
|
Find more details in the about Tab. |
|
|
|
|
|
""" |
|
|
|
|
|
LLM_BENCHMARKS_TEXT = f""" |
|
|
|
## FAQ |
|
|
|
### What is the difference betweem ABL and ABB? |
|
|
|
ABL is the Leaderboard which uses ABB benchmarking dataset and code in the backend to produce the results you see here |
|
|
|
|
|
### What can I learn more about ABL and ABB? |
|
|
|
Feel free to read the following resources |
|
ABB Page: |
|
ABL blog post: |
|
|
|
### How can I reproduce the results? |
|
|
|
You can easily run the ABB benchmarking code using the following command on Google Collab or your own infratructure. |
|
|
|
### What is the Benchmark Score? |
|
|
|
### What is the Contamination Score? |
|
|
|
### What is the Speed? |
|
|
|
### Why I am not allowed to submit models more than 15B parameters? |
|
|
|
|
|
""" |
|
|
|
EVALUATION_QUEUE_TEXT = """ |
|
|
|
""" |
|
|
|
CITATION_BUTTON_LABEL = "Copy the following snippet to cite the Leaderboard" |
|
CITATION_BUTTON_TEXT = r""" |
|
|
|
@misc{ABL, |
|
author = {SILMA AI Team}, |
|
title = {Arabic Broad Leaderboard}, |
|
year = {2025}, |
|
publisher = {SILMA.AI}, |
|
howpublished = "{\url{https://huggingface.co/spaces/silma-ai/Arabic-LLM-Broad-Leaderboard}}" |
|
} |
|
|
|
""" |
|
|
|
FOOTER_TEXT = """<div style='display:flex;justify-content:center;align-items:center;'> |
|
<span style='font-size:36px;font-weight:bold;margin-right:20px;'>Sponsored By</span> |
|
<a href='https://silma.ai/?ref=abl' target='_blank'> |
|
<img style='height:60px' src='https://huggingface.co/spaces/silma-ai/Arabic-LLM-Broad-Leaderboard/resolve/main/src/images/silma-logo-wide.png' > |
|
</a> |
|
</div>""" |
|
|