karimouda's picture
feedback #1 applied
2fe1d39
raw
history blame
4.36 kB
from dataclasses import dataclass
from enum import Enum
@dataclass
class EvalDimension:
metric: str
col_name: str
# Select your tasks here
# ---------------------------------------------------
class EvalDimensions(Enum):
d0 = EvalDimension("speed", "Speed (words/sec)")
d1 = EvalDimension("contamination_score", "Contamination Score")
d2 = EvalDimension("paraphrasing", "Paraphrasing")
d3 = EvalDimension("sentiment analysis", "Sentiment Analysis")
d4 = EvalDimension("coding", "Coding")
d5 = EvalDimension("function calling", "Function Calling")
d6 = EvalDimension("rag qa", "RAG QA")
d7 = EvalDimension("reading comprehension", "Reading Comprehension")
d8 = EvalDimension("entity extraction", "Entity Extraction")
d9 = EvalDimension("summarization", "Summarization")
d10 = EvalDimension("long context", "Long Context")
d11 = EvalDimension("mmlu", "MMLU")
d12 = EvalDimension("arabic language & grammar", "Arabic Language & Grammar")
d13 = EvalDimension("general knowledge", "General Knowledge")
d14 = EvalDimension("translation (incl dialects)", "Translation (incl Dialects)")
d15 = EvalDimension("trust & safety","Trust & Safety")
d16 = EvalDimension("writing (incl dialects)", "Writing (incl Dialects)")
d17 = EvalDimension("dialect detection", "Dialect Detection")
d18 = EvalDimension("reasoning & math", "Reasoning & Math")
d19 = EvalDimension("diacritization", "Diacritization")
d20 = EvalDimension("instruction following", "Instruction Following")
d21 = EvalDimension("transliteration", "Transliteration")
d22 = EvalDimension("structuring", "Structuring")
d23 = EvalDimension("hallucination", "Hallucination")
NUM_FEWSHOT = 0 # Change with your few shot
# ---------------------------------------------------
# Your leaderboard name
TITLE = """<div ><img class='abl_header_image' src='https://huggingface.co/spaces/silma-ai/Arabic-LLM-Broad-Leaderboard/resolve/main/src/images/abl_logo.png' ></div>"""
# What does your leaderboard evaluate?
INTRODUCTION_TEXT = """
<h1 style='width: 100%;text-align: center;' id="space-title">Arabic Broad Leaderboard (ABL) - The first comprehensive Leaderboard for Arabic LLMs</h1>
ABL is the official Leaderboard of <a href='https://huggingface.co/datasets/silma-ai/arabic-broad-benchmark' target='_blank'>Arabic Broad Benchmark (ABB)</a>.
With advanced features and innovative visualizations, we provide the community with a comprehensive view of the capabilities of Arabic models, showcasing their speed, diverse skills while also defending against benchmarking contamination.
The benchmark consists of <b>450 high quality human-validated questions</b> sampled from <b>63 Arabic benchmarking datasets</b>, evaluating <b>22 categories and skills</b>.
Find more details in the about Tab.
"""
# Which evaluations are you running? how can people reproduce what you have?
LLM_BENCHMARKS_TEXT = f"""
## FAQ
### What is the difference betweem ABL and ABB?
ABL is the Leaderboard which uses ABB benchmarking dataset and code in the backend to produce the results you see here
### What can I learn more about ABL and ABB?
Feel free to read the following resources
ABB Page:
ABL blog post:
### How can I reproduce the results?
You can easily run the ABB benchmarking code using the following command on Google Collab or your own infratructure.
### What is the Benchmark Score?
### What is the Contamination Score?
### What is the Speed?
### Why I am not allowed to submit models more than 15B parameters?
"""
EVALUATION_QUEUE_TEXT = """
"""
CITATION_BUTTON_LABEL = "Copy the following snippet to cite the Leaderboard"
CITATION_BUTTON_TEXT = r"""
@misc{ABL,
author = {SILMA AI Team},
title = {Arabic Broad Leaderboard},
year = {2025},
publisher = {SILMA.AI},
howpublished = "{\url{https://huggingface.co/spaces/silma-ai/Arabic-LLM-Broad-Leaderboard}}"
}
"""
FOOTER_TEXT = """<div style='display:flex;justify-content:center;align-items:center;'>
<span style='font-size:36px;font-weight:bold;margin-right:20px;'>Sponsored By</span>
<a href='https://silma.ai/?ref=abl' target='_blank'>
<img style='height:60px' src='https://huggingface.co/spaces/silma-ai/Arabic-LLM-Broad-Leaderboard/resolve/main/src/images/silma-logo-wide.png' >
</a>
</div>"""