Spaces:
Running
Running
# some code blocks are taken from https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/tree/main | |
import os | |
import gradio as gr | |
from huggingface_hub import HfApi | |
from src.css_html import custom_css | |
from src.text_content import ABOUT_TEXT, SUBMISSION_TEXT_3 | |
from src.utils import ( | |
plot_leaderboard_scores, | |
create_result_dataframes, create_result_dataframes_lite | |
) | |
TOKEN = os.environ.get("HF_TOKEN", None) | |
api = HfApi(TOKEN) | |
def filter_items(df, leaderboard_table, query): | |
if query == "all": | |
return df[leaderboard_table.columns] | |
else: | |
query = query[0] | |
filtered_df = df[df["T"].str.contains(query, na=False)] | |
return filtered_df[leaderboard_table.columns] | |
def search_table(df, leaderboard_table, query): | |
filtered_df = df[(df["Model"].str.contains(query, case=False))] | |
return filtered_df[leaderboard_table.columns] | |
demo = gr.Blocks(css=custom_css) | |
with demo: | |
with gr.Row(): | |
gr.Markdown( | |
""" | |
<div style="text-align: center;"> | |
<h1>π AfroBench <span style='color: #e6b800;'>Leaderboard</span></h1> | |
</div> | |
<p style="text-align: center; font-size: 16px;"> | |
This leaderboard tracks the performance of multilingual models across <b>64 African languages</b>, <b>15 NLP tasks</b> and <b>22 datasets</b>, | |
covering a range of tasks from POS tagging to question answering, summarization, and machine translation. | |
</p> | |
<p style="font-size: 14px; text-align: center;"> | |
It's based on the <a href="https://mcgill-nlp.github.io/AfroBench/index.html" target="_blank">AfroBench benchmark</a> and is designed | |
to highlight both full-scale evaluations and cost-efficient subsets (AfroBench-Lite).<br><br> | |
We aim to support better transparency and tooling for evaluating models in African languages. | |
</p> | |
""", | |
elem_classes="markdown-text", | |
) | |
with gr.Tabs(elem_classes="tab-buttons") as tabs: | |
with gr.Column(): | |
with gr.Tabs(elem_classes="A100-tabs") as A100_tabs: | |
with gr.TabItem("π Evaluation table", id=0): | |
with gr.Column(): | |
view_source = gr.Radio( | |
label="π Select Leaderboard Source", | |
choices=["afrobench", "afrobench_lite"], | |
value="afrobench", | |
interactive=True, | |
) | |
view_selector = gr.Dropdown( | |
label="π Select View", | |
choices=["category", "task", "dataset"], | |
value="category", | |
interactive=True, | |
) | |
with gr.Accordion("β‘οΈ See All Columns", open=False): | |
shown_columns = gr.CheckboxGroup( | |
choices=[], | |
value=[], | |
label="Select columns to display", | |
elem_id="column-select", | |
interactive=True, | |
) | |
leaderboard_df = gr.Dataframe( | |
label="Leaderboard", | |
interactive=False, | |
elem_id="leaderboard-table", | |
wrap=True, | |
) | |
view_options_map = { | |
"afrobench": ["category", "task", "dataset"], | |
"afrobench_lite": ["task", "dataset", "language"], | |
} | |
init_trigger = gr.Button(visible=False) | |
def update_view_selector(source): | |
options = view_options_map[source] | |
default = options[0] | |
return gr.update(choices=options, value=default), default | |
def refresh_table_and_columns(view_type, source): | |
path = "data/leaderboard_json/afrobench_lite.json" if source == "afrobench_lite" else "data/leaderboard_json/afrobench.json" | |
if source == "afrobench_lite": | |
df = create_result_dataframes_lite(path, level=view_type) | |
else: | |
df = create_result_dataframes(path, level=view_type) | |
df.reset_index(inplace=True) | |
df.rename(columns={"index": "Model"}, inplace=True) | |
metric_cols = [col for col in df.columns if col != "Model"] | |
df["Score"] = df[metric_cols].mean(axis=1).round(1) | |
all_cols = ["Model", "Score"] + sorted( | |
[col for col in df.columns if col not in ["Model", "Score"]]) | |
df = df[all_cols] | |
shown_choices = sorted([col for col in df.columns if col not in ["Model", "Score"]]) | |
return df, gr.update(choices=shown_choices, value=shown_choices), shown_choices | |
def refresh_table_only(view_type, selected_cols, source): | |
path = "data/leaderboard_json/afrobench_lite.json" if source == "afrobench_lite" else "data/leaderboard_json/afrobench.json" | |
if source == "afrobench_lite": | |
df = create_result_dataframes_lite(path, level=view_type) | |
else: | |
df = create_result_dataframes(path, level=view_type) | |
df.reset_index(inplace=True) | |
df.rename(columns={"index": "Model"}, inplace=True) | |
metric_cols = [col for col in df.columns if col != "Model"] | |
df["Score"] = df[metric_cols].mean(axis=1).round(1) | |
return df[["Model", "Score"] + [c for c in selected_cols if c in df.columns]] | |
# Trigger once on launch | |
def initialize(_): | |
return refresh_table_and_columns("category", "afrobench") | |
init_trigger.click( | |
fn=initialize, | |
inputs=[init_trigger], | |
outputs=[leaderboard_df, shown_columns, shown_columns], | |
) | |
view_source.change( | |
fn=update_view_selector, | |
inputs=[view_source], | |
outputs=[view_selector, view_selector], | |
).then( | |
fn=refresh_table_and_columns, | |
inputs=[view_selector, view_source], | |
outputs=[leaderboard_df, shown_columns, shown_columns], | |
) | |
view_selector.change( | |
fn=refresh_table_and_columns, | |
inputs=[view_selector, view_source], | |
outputs=[leaderboard_df, shown_columns, shown_columns], | |
) | |
shown_columns.change( | |
fn=refresh_table_only, | |
inputs=[view_selector, shown_columns, view_source], | |
outputs=leaderboard_df, | |
) | |
demo.load( | |
fn=initialize, | |
inputs=[init_trigger], | |
outputs=[leaderboard_df, shown_columns, shown_columns], | |
) | |
gr.Markdown( | |
""" | |
**Notes:** | |
- Score is the average across all the columns you're seeing in the leaderboard, based on the view and filters youβve selected. | |
- For more details check the π About section. | |
""", | |
elem_classes="markdown-text", | |
) | |
with gr.TabItem("π Performance Plot", id=1): | |
with gr.Row(): | |
model_score_plot = gr.Plot(label="Model Score Comparison") | |
# Update plot when view_source, view_selector, or shown_columns change | |
view_source.change( | |
fn=plot_leaderboard_scores, | |
inputs=[view_selector, shown_columns, view_source], | |
outputs=model_score_plot, | |
) | |
view_selector.change( | |
fn=plot_leaderboard_scores, | |
inputs=[view_selector, shown_columns, view_source], | |
outputs=model_score_plot, | |
) | |
shown_columns.change( | |
fn=plot_leaderboard_scores, | |
inputs=[view_selector, shown_columns, view_source], | |
outputs=model_score_plot, | |
) | |
demo.load( | |
fn=plot_leaderboard_scores, | |
inputs=[view_selector, shown_columns, view_source], | |
outputs=model_score_plot, | |
) | |
with gr.TabItem("π About", id=2): | |
gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text") | |
with gr.TabItem("Submit results π", id=3): | |
gr.Markdown(SUBMISSION_TEXT_3) | |
# demo.launch() | |
demo.launch(server_name="0.0.0.0", server_port=7860) | |