tohid.abedini
[Add] average to df
c4887da
raw
history blame
6.49 kB
import gradio as gr
from gradio_leaderboard import Leaderboard, SelectColumns, ColumnFilter
from pathlib import Path
import pandas as pd
import os
import json
import requests
from envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO
from utils import LLM_BENCHMARKS_ABOUT_TEXT, LLM_BENCHMARKS_SUBMIT_TEXT, custom_css, jsonl_to_dataframe, add_average_column_to_df, apply_clickable_model
def fill_form(model_name, model_id, contact_email, challenge, submission_id, paper_link, architecture, license):
value = {
# Model name
"entry.1591601824": model_name,
# username/space
"entry.1171388028": model_id,
# Submission ID on CMT
"entry.171528970": submission_id,
# Preprint or paper link
"entry.1284338508": paper_link,
# Model architecture
"entry.1291571256": architecture,
# License
# Option: any text
"entry.272554778": license,
# Challenge
# Option: any text
"entry.1908975677": challenge,
# Email
# Option: any text
"entry.964644151": contact_email
}
return value
def sendForm(url, data):
try:
requests.post(url, data=data)
print("Submitted successfully!")
except:
print("Error!")
def submit(model_name, model_id, contact_email, challenge, submission_id, paper_link, architecture, license):
if model_name == "" or model_id == "" or challenge == "" or architecture == "" or license == "":
gr.Error("Please fill all the fields")
return
if submission_id == "" and paper_link =="":
gr.Error("Provide either a link to a paper describing the method or a submission ID for the MLSB workshop.")
return
try:
user_name = ""
if "/" in model_id:
user_name = model_id.split("/")[0]
model_path = model_id.split("/")[1]
eval_entry = {
"model_name": model_name,
"model_id": model_id,
"challenge": challenge,
"submission_id": submission_id,
"architecture": architecture,
"license": license
}
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
os.makedirs(OUT_DIR, exist_ok=True)
out_path = f"{OUT_DIR}/{user_name}_{model_path}.json"
with open(out_path, "w") as f:
f.write(json.dumps(eval_entry))
print("Sending form")
formData = fill_form(model_name, model_id, contact_email, challenge, submission_id, paper_link, architecture, license)
sendForm("https://docs.google.com/forms/d/e/1FAIpQLSf1zP7RAFC5RLlva03xm0eIAPLKXOmMvNUzirbm82kdCUFKNw/formResponse", formData)
print("Uploading eval file")
API.upload_file(
path_or_fileobj=out_path,
path_in_repo=out_path.split("eval-queue/")[1],
repo_id=QUEUE_REPO,
repo_type="dataset",
commit_message=f"Add {model_name} to eval queue",
)
gr.Info("Successfully submitted", duration=10)
# Remove the local file
os.remove(out_path)
except:
gr.Error("Error submitting the model")
abs_path = Path(__file__).parent
# Any pandas-compatible data
persian_df = jsonl_to_dataframe(str(abs_path / "leaderboard_persian.jsonl"))
base_df = jsonl_to_dataframe(str(abs_path / "leaderboard_base.jsonl"))
all_columns = ["Model", "Average ⬆️", "Precision", "#Params (B)", "Part Multiple Choice", "ARC Easy", "ARC Challenging", "MMLU Pro", "GSM8k Persian", "Multiple Choice Persian"]
columns_to_average = ["Part Multiple Choice", "ARC Easy", "ARC Challenging", "MMLU Pro", "GSM8k Persian", "Multiple Choice Persian"]
base_df = add_average_column_to_df(base_df, columns_to_average, index=3)
persian_df = add_average_column_to_df(persian_df, columns_to_average, index=3)
base_df = apply_clickable_model(df=base_df, column_name="Model")
persian_df = apply_clickable_model(df=persian_df, column_name="Model")
with gr.Blocks(css=custom_css) as demo:
gr.Markdown("""
# Part LLM Leaderboard
""")
with gr.Tab("πŸŽ–οΈ Persian Leaderboard"):
gr.Markdown("""## Persian LLM Leaderboard
Evaluating Persian Fine-Tuned models
""")
Leaderboard(
value=persian_df,
select_columns=SelectColumns(
default_selection=all_columns,
cant_deselect=["Model"],
label="Select Columns to Show",
),
search_columns=["model_name_for_query"],
hide_columns=["model_name_for_query",],
filter_columns=["Precision", "#Params (B)"],
)
with gr.Tab("πŸ₯‡ Base Leaderboard"):
gr.Markdown("""## Base LLM Leaderboard
Evaluating Base Models
""")
Leaderboard(
value=base_df,
select_columns=SelectColumns(
default_selection=all_columns,
cant_deselect=["Model"],
label="Select Columns to Show",
),
search_columns=["model_name_for_query"],
hide_columns=["model_name_for_query",],
filter_columns=["Precision", "#Params (B)"],
)
with gr.TabItem("πŸ“ About"):
gr.Markdown(LLM_BENCHMARKS_ABOUT_TEXT)
with gr.Tab("βœ‰οΈ Submit"):
gr.Markdown(LLM_BENCHMARKS_SUBMIT_TEXT)
model_name = gr.Textbox(label="Model name")
model_id = gr.Textbox(label="username/space e.g mlsb/alphafold3")
contact_email = gr.Textbox(label="Contact E-Mail")
challenge = gr.Radio(choices=["Persian", "Base"],label="Challenge")
gr.Markdown("Either give a submission id if you submitted to the MLSB workshop or provide a link to the preprint/paper describing the method.")
with gr.Row():
submission_id = gr.Textbox(label="Submission ID on CMT")
paper_link = gr.Textbox(label="Preprint or Paper link")
architecture = gr.Dropdown(choices=["GNN", "CNN","Diffusion Model", "Physics-based", "Other"],label="Model architecture")
license = gr.Dropdown(choices=["mit", "apache-2.0", "gplv2", "gplv3", "lgpl", "mozilla", "bsd", "other"],label="License")
submit_btn = gr.Button("Submit")
submit_btn.click(submit, inputs=[model_name, model_id, contact_email, challenge, submission_id, paper_link, architecture, license], outputs=[])
gr.Markdown("""
Please find more information about the challenges on [mlsb.io/#challenge](https://mlsb.io/#challenge)""")
if __name__ == "__main__":
demo.launch()