import gradio as gr from gradio_leaderboard import Leaderboard, SelectColumns, ColumnFilter from pathlib import Path import pandas as pd import os import json import requests from envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO from utils import LLM_BENCHMARKS_ABOUT_TEXT, LLM_BENCHMARKS_SUBMIT_TEXT, custom_css, jsonl_to_dataframe, add_average_column_to_df, apply_clickable_model def fill_form(model_name, model_id, contact_email, challenge, submission_id, paper_link, architecture, license): value = { # Model name "entry.1591601824": model_name, # username/space "entry.1171388028": model_id, # Submission ID on CMT "entry.171528970": submission_id, # Preprint or paper link "entry.1284338508": paper_link, # Model architecture "entry.1291571256": architecture, # License # Option: any text "entry.272554778": license, # Challenge # Option: any text "entry.1908975677": challenge, # Email # Option: any text "entry.964644151": contact_email } return value def sendForm(url, data): try: requests.post(url, data=data) print("Submitted successfully!") except: print("Error!") def submit(model_name, model_id, contact_email, challenge, submission_id, paper_link, architecture, license): if model_name == "" or model_id == "" or challenge == "" or architecture == "" or license == "": gr.Error("Please fill all the fields") return if submission_id == "" and paper_link =="": gr.Error("Provide either a link to a paper describing the method or a submission ID for the MLSB workshop.") return try: user_name = "" if "/" in model_id: user_name = model_id.split("/")[0] model_path = model_id.split("/")[1] eval_entry = { "model_name": model_name, "model_id": model_id, "challenge": challenge, "submission_id": submission_id, "architecture": architecture, "license": license } OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}" os.makedirs(OUT_DIR, exist_ok=True) out_path = f"{OUT_DIR}/{user_name}_{model_path}.json" with open(out_path, "w") as f: f.write(json.dumps(eval_entry)) print("Sending form") formData = fill_form(model_name, model_id, contact_email, challenge, submission_id, paper_link, architecture, license) sendForm("https://docs.google.com/forms/d/e/1FAIpQLSf1zP7RAFC5RLlva03xm0eIAPLKXOmMvNUzirbm82kdCUFKNw/formResponse", formData) print("Uploading eval file") API.upload_file( path_or_fileobj=out_path, path_in_repo=out_path.split("eval-queue/")[1], repo_id=QUEUE_REPO, repo_type="dataset", commit_message=f"Add {model_name} to eval queue", ) gr.Info("Successfully submitted", duration=10) # Remove the local file os.remove(out_path) except: gr.Error("Error submitting the model") abs_path = Path(__file__).parent # Any pandas-compatible data persian_df = jsonl_to_dataframe(str(abs_path / "leaderboard_persian.jsonl")) base_df = jsonl_to_dataframe(str(abs_path / "leaderboard_base.jsonl")) all_columns = ["Model", "Average ⬆️", "Precision", "#Params (B)", "Part Multiple Choice", "ARC Easy", "ARC Challenging", "MMLU Pro", "GSM8k Persian", "Multiple Choice Persian"] columns_to_average = ["Part Multiple Choice", "ARC Easy", "ARC Challenging", "MMLU Pro", "GSM8k Persian", "Multiple Choice Persian"] base_df = add_average_column_to_df(base_df, columns_to_average, index=3) persian_df = add_average_column_to_df(persian_df, columns_to_average, index=3) base_df = apply_clickable_model(df=base_df, column_name="Model") persian_df = apply_clickable_model(df=persian_df, column_name="Model") with gr.Blocks(css=custom_css) as demo: gr.Markdown(""" # Part LLM Leaderboard """) with gr.Tab("🎖️ Persian Leaderboard"): gr.Markdown("""## Persian LLM Leaderboard Evaluating Persian Fine-Tuned models """) Leaderboard( value=persian_df, select_columns=SelectColumns( default_selection=all_columns, cant_deselect=["Model"], label="Select Columns to Show", ), search_columns=["model_name_for_query"], hide_columns=["model_name_for_query",], filter_columns=["Precision", "#Params (B)"], ) with gr.Tab("🥇 Base Leaderboard"): gr.Markdown("""## Base LLM Leaderboard Evaluating Base Models """) Leaderboard( value=base_df, select_columns=SelectColumns( default_selection=all_columns, cant_deselect=["Model"], label="Select Columns to Show", ), search_columns=["model_name_for_query"], hide_columns=["model_name_for_query",], filter_columns=["Precision", "#Params (B)"], ) with gr.TabItem("📝 About"): gr.Markdown(LLM_BENCHMARKS_ABOUT_TEXT) with gr.Tab("✉️ Submit"): gr.Markdown(LLM_BENCHMARKS_SUBMIT_TEXT) model_name = gr.Textbox(label="Model name") model_id = gr.Textbox(label="username/space e.g mlsb/alphafold3") contact_email = gr.Textbox(label="Contact E-Mail") challenge = gr.Radio(choices=["Persian", "Base"],label="Challenge") gr.Markdown("Either give a submission id if you submitted to the MLSB workshop or provide a link to the preprint/paper describing the method.") with gr.Row(): submission_id = gr.Textbox(label="Submission ID on CMT") paper_link = gr.Textbox(label="Preprint or Paper link") architecture = gr.Dropdown(choices=["GNN", "CNN","Diffusion Model", "Physics-based", "Other"],label="Model architecture") license = gr.Dropdown(choices=["mit", "apache-2.0", "gplv2", "gplv3", "lgpl", "mozilla", "bsd", "other"],label="License") submit_btn = gr.Button("Submit") submit_btn.click(submit, inputs=[model_name, model_id, contact_email, challenge, submission_id, paper_link, architecture, license], outputs=[]) gr.Markdown(""" Please find more information about the challenges on [mlsb.io/#challenge](https://mlsb.io/#challenge)""") if __name__ == "__main__": demo.launch()