|
import gradio as gr |
|
from gradio_leaderboard import Leaderboard, SelectColumns, ColumnFilter |
|
from pathlib import Path |
|
import pandas as pd |
|
|
|
import os |
|
|
|
import json |
|
|
|
import requests |
|
|
|
from envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO |
|
from utils import LLM_BENCHMARKS_ABOUT_TEXT, LLM_BENCHMARKS_SUBMIT_TEXT, custom_css, jsonl_to_dataframe, add_average_column_to_df, apply_clickable_model |
|
|
|
|
|
|
|
def fill_form(model_name, model_id, contact_email, challenge, submission_id, paper_link, architecture, license): |
|
value = { |
|
|
|
"entry.1591601824": model_name, |
|
|
|
"entry.1171388028": model_id, |
|
|
|
"entry.171528970": submission_id, |
|
|
|
"entry.1284338508": paper_link, |
|
|
|
"entry.1291571256": architecture, |
|
|
|
|
|
"entry.272554778": license, |
|
|
|
|
|
"entry.1908975677": challenge, |
|
|
|
|
|
"entry.964644151": contact_email |
|
} |
|
|
|
return value |
|
|
|
def sendForm(url, data): |
|
try: |
|
requests.post(url, data=data) |
|
print("Submitted successfully!") |
|
except: |
|
print("Error!") |
|
|
|
def submit(model_name, model_id, contact_email, challenge, submission_id, paper_link, architecture, license): |
|
|
|
if model_name == "" or model_id == "" or challenge == "" or architecture == "" or license == "": |
|
gr.Error("Please fill all the fields") |
|
return |
|
if submission_id == "" and paper_link =="": |
|
gr.Error("Provide either a link to a paper describing the method or a submission ID for the MLSB workshop.") |
|
return |
|
try: |
|
user_name = "" |
|
if "/" in model_id: |
|
user_name = model_id.split("/")[0] |
|
model_path = model_id.split("/")[1] |
|
|
|
eval_entry = { |
|
"model_name": model_name, |
|
"model_id": model_id, |
|
"challenge": challenge, |
|
"submission_id": submission_id, |
|
"architecture": architecture, |
|
"license": license |
|
} |
|
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}" |
|
os.makedirs(OUT_DIR, exist_ok=True) |
|
out_path = f"{OUT_DIR}/{user_name}_{model_path}.json" |
|
|
|
with open(out_path, "w") as f: |
|
f.write(json.dumps(eval_entry)) |
|
|
|
print("Sending form") |
|
formData = fill_form(model_name, model_id, contact_email, challenge, submission_id, paper_link, architecture, license) |
|
sendForm("https://docs.google.com/forms/d/e/1FAIpQLSf1zP7RAFC5RLlva03xm0eIAPLKXOmMvNUzirbm82kdCUFKNw/formResponse", formData) |
|
|
|
print("Uploading eval file") |
|
API.upload_file( |
|
path_or_fileobj=out_path, |
|
path_in_repo=out_path.split("eval-queue/")[1], |
|
repo_id=QUEUE_REPO, |
|
repo_type="dataset", |
|
commit_message=f"Add {model_name} to eval queue", |
|
) |
|
|
|
gr.Info("Successfully submitted", duration=10) |
|
|
|
os.remove(out_path) |
|
except: |
|
gr.Error("Error submitting the model") |
|
|
|
|
|
|
|
|
|
|
|
abs_path = Path(__file__).parent |
|
|
|
|
|
persian_df = jsonl_to_dataframe(str(abs_path / "leaderboard_persian.jsonl")) |
|
base_df = jsonl_to_dataframe(str(abs_path / "leaderboard_base.jsonl")) |
|
|
|
|
|
all_columns = ["Model", "Average β¬οΈ", "Precision", "#Params (B)", "Part Multiple Choice", "ARC Easy", "ARC Challenging", "MMLU Pro", "GSM8k Persian", "Multiple Choice Persian"] |
|
columns_to_average = ["Part Multiple Choice", "ARC Easy", "ARC Challenging", "MMLU Pro", "GSM8k Persian", "Multiple Choice Persian"] |
|
|
|
|
|
base_df = add_average_column_to_df(base_df, columns_to_average, index=3) |
|
persian_df = add_average_column_to_df(persian_df, columns_to_average, index=3) |
|
|
|
base_df = apply_clickable_model(df=base_df, column_name="Model") |
|
persian_df = apply_clickable_model(df=persian_df, column_name="Model") |
|
|
|
|
|
|
|
with gr.Blocks(css=custom_css) as demo: |
|
gr.Markdown(""" |
|
# Part LLM Leaderboard |
|
""") |
|
|
|
with gr.Tab("ποΈ Persian Leaderboard"): |
|
gr.Markdown("""## Persian LLM Leaderboard |
|
Evaluating Persian Fine-Tuned models |
|
""") |
|
Leaderboard( |
|
value=persian_df, |
|
select_columns=SelectColumns( |
|
default_selection=all_columns, |
|
cant_deselect=["Model"], |
|
label="Select Columns to Show", |
|
), |
|
search_columns=["model_name_for_query"], |
|
hide_columns=["model_name_for_query",], |
|
filter_columns=["Precision", "#Params (B)"], |
|
) |
|
with gr.Tab("π₯ Base Leaderboard"): |
|
gr.Markdown("""## Base LLM Leaderboard |
|
Evaluating Base Models |
|
""") |
|
Leaderboard( |
|
value=base_df, |
|
select_columns=SelectColumns( |
|
default_selection=all_columns, |
|
cant_deselect=["Model"], |
|
label="Select Columns to Show", |
|
), |
|
search_columns=["model_name_for_query"], |
|
hide_columns=["model_name_for_query",], |
|
filter_columns=["Precision", "#Params (B)"], |
|
) |
|
with gr.TabItem("π About"): |
|
gr.Markdown(LLM_BENCHMARKS_ABOUT_TEXT) |
|
|
|
with gr.Tab("βοΈ Submit"): |
|
gr.Markdown(LLM_BENCHMARKS_SUBMIT_TEXT) |
|
model_name = gr.Textbox(label="Model name") |
|
model_id = gr.Textbox(label="username/space e.g mlsb/alphafold3") |
|
contact_email = gr.Textbox(label="Contact E-Mail") |
|
challenge = gr.Radio(choices=["Persian", "Base"],label="Challenge") |
|
gr.Markdown("Either give a submission id if you submitted to the MLSB workshop or provide a link to the preprint/paper describing the method.") |
|
with gr.Row(): |
|
submission_id = gr.Textbox(label="Submission ID on CMT") |
|
paper_link = gr.Textbox(label="Preprint or Paper link") |
|
architecture = gr.Dropdown(choices=["GNN", "CNN","Diffusion Model", "Physics-based", "Other"],label="Model architecture") |
|
license = gr.Dropdown(choices=["mit", "apache-2.0", "gplv2", "gplv3", "lgpl", "mozilla", "bsd", "other"],label="License") |
|
submit_btn = gr.Button("Submit") |
|
|
|
submit_btn.click(submit, inputs=[model_name, model_id, contact_email, challenge, submission_id, paper_link, architecture, license], outputs=[]) |
|
|
|
gr.Markdown(""" |
|
Please find more information about the challenges on [mlsb.io/#challenge](https://mlsb.io/#challenge)""") |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |