abdev-leaderboard

Running

File size: 8,145 Bytes

from pathlib import Path
import json
import pandas as pd

import gradio as gr
from gradio_leaderboard import Leaderboard
from evaluation import evaluate_problem

from utils import read_submission_from_hub, write_results
from about import ASSAY_LIST, ASSAY_RENAME, ASSAY_EMOJIS

def evaluate_boundary(filename):
    print(filename)
    local_path = read_submission_from_hub(filename)
    with Path(local_path).open("r") as f:
        raw = f.read()
        data_dict = json.loads(raw)

    try:
        result = evaluate_problem(data_dict['problem_type'], local_path)
    except Exception as e:
        raise gr.Error(f'Evaluation failed: {e}. No results written to results dataset.')
    
    write_results(data_dict, result)
    return

def get_leaderboard_table(assay: str | None = None):
    # ds = load_dataset(results_repo, split='train', download_mode="force_redownload")
    # full_df = pd.DataFrame(ds)
    # full_df['full results'] = full_df['result_filename'].apply(lambda x: make_boundary_clickable(x)).astype(str)

    # full_df.rename(columns={'submission_time': 'submission time', 'problem_type': 'problem type'}, inplace=True)
    # to_show = full_df.copy(deep=True)
    # to_show = to_show[to_show['user'] != 'test']
    # to_show = to_show[['submission time', 'problem type', 'user', 'score', 'full results']]
    # to_show['user'] = to_show['user'].apply(lambda x: make_user_clickable(x)).astype(str)
    
    # Previously hosted on HF hub, local for now (Can also pull directly from github backend)
    column_order = ["model", "property", "spearman", "spearman_abs"]  # "assay", 
    df = pd.read_csv("data/metrics_all.csv").drop_duplicates(subset=["model", "assay"])
    df["property"] = df["assay"].map(ASSAY_RENAME)
    df = df.query("assay.isin(@ASSAY_RENAME.keys())")
    if assay is not None:
        df = df[df['assay'] == assay]
    df = df[column_order]
    return df.sort_values(by="spearman_abs", ascending=False)

def get_leaderboard_object(assay: str | None = None):
    df = get_leaderboard_table(assay=assay)
    filter_columns = ["model"]
    if assay is None:
        filter_columns.append("property")
    # TODO how to sort filter columns alphabetically?
    Leaderboard(
        value=df,
        datatype=["str", "str", "str", "number"],
        select_columns=["model", "property", "spearman"],
        search_columns=["model"],
        filter_columns=filter_columns,
        every=60,
        render=True
    )

def show_output_box(message):
    return gr.update(value=message, visible=True)

# 
# def gradio_interface() -> gr.Blocks:
with gr.Blocks() as demo:
    gr.Markdown("""
        ## Welcome to the Ginkgo Antibody Developability Benchmark Leaderboard!
        
        Participants can submit their model to the leaderboard by 
        """)
    with gr.Tabs(elem_classes="tab-buttons"):
        with gr.TabItem("🚀 Leaderboard", elem_id="abdev-benchmark-tab-table"):                
            gr.Markdown("# Antibody Developability Benchmark Leaderboard")

            get_leaderboard_object()

            # gr.Markdown("Extra info here")

        # Procedurally make these 5 tabs
        for assay in ASSAY_LIST:
            with gr.TabItem(f"{ASSAY_EMOJIS[assay]} {ASSAY_RENAME[assay]}", elem_id=f"abdev-benchmark-tab-table"):
                gr.Markdown(f"# {ASSAY_RENAME[assay]} (measured by {assay})")
                get_leaderboard_object(assay=assay)
        
        with gr.TabItem("❔About", elem_id="abdev-benchmark-tab-table"):
            gr.Markdown(
                """
                ## About this challenge
                
                We're inviting the ML/bio community to predict developability properties for 244 antibodies from the [GDPa1 dataset](https://huggingface.co/datasets/ginkgo-datapoints/GDPa1).
                
                **What is antibody developability?**
                
                Antibodies have to be manufacturable, stable in high concentrations, and have low off-target effects. 
                Properties such as these can often hinder the progression of an antibody to the clinic, and are collectively referred to as 'developability'.
                Here we show 5 of these properties and invite the community to submit and develop better predictors, which will be tested out on a heldout private set to assess model generalization.
                
                **How to submit?**
                
                TODO
                
                **How to evaluate?**
                
                TODO
            """
            )

            # dropdown = gr.Dropdown(choices=filenames, label="Choose a file")
            # plot_output = gr.Plot()

        # with gr.TabItem("🔍 Visualize", elem_id="boundary-benchmark-tab-table"):
        #     ds = load_dataset(results_repo, split='train', download_mode="force_redownload")
        #     full_df = pd.DataFrame(ds)
        #     filenames = full_df['result_filename'].to_list()
        #     with gr.Row():
        #         with gr.Column():
        #             dropdown = gr.Dropdown(choices=filenames, label="Choose a leaderboard entry", value=filenames[0])
        #             rld_btn = gr.Button(value="Reload")

        #         with gr.Column():
        #             plot = gr.Plot()

        #     def get_boundary_vis(selected_file):
        #         local_path = read_result_from_hub(selected_file)
        #         with Path(local_path).open("r") as f:
        #             raw = f.read()
        #             data_dict = json.loads(raw)
        #             boundary_json = data_dict['boundary_json']

        #         if data_dict['problem_type'] == 'mhd_stable':
        #             raise gr.Error("Sorry this isn't implemented for mhd_stable submissions yet!")
        #         else:
        #             boundary = load_boundary(boundary_json)

        #         vis = make_visual(boundary)
        #         return vis

        #     demo.load(get_boundary_vis, dropdown, plot)
        #     rld_btn.click(get_boundary_vis, dropdown, plot)

        # with gr.TabItem("✉️ Submit", elem_id="boundary-benchmark-tab-table"):
        #     gr.Markdown(
        #         """
        #     # Plasma Boundary Evaluation Submission
        #     Upload your plasma boundary JSON and select the problem type to get your score.
        #     """
        #     )
        #     filename = gr.State(value=None) 
        #     eval_state = gr.State(value=None) 
        #     user_state = gr.State(value=None)

        #     # gr.LoginButton()

        #     with gr.Row():
        #         with gr.Column():
        #             problem_type = gr.Dropdown(PROBLEM_TYPES, label="Problem Type")
        #             username_input = gr.Textbox(
        #                 label="Username", 
        #                 placeholder="Enter your Hugging Face username",
        #                 info="This will be displayed on the leaderboard."
        #             )
        #         with gr.Column():
        #             boundary_file = gr.File(label="Boundary JSON File (.json)")

        #     username_input.change(
        #         fn=lambda x: x if x.strip() else None,
        #         inputs=username_input,
        #         outputs=user_state
        #     )                       

        #     submit_btn = gr.Button("Evaluate")
        #     message = gr.Textbox(label="Status", lines=1, visible=False)
        #     # help message
        #     gr.Markdown("If you have issues with submission or using the leaderboard, please start a discussion in the Community tab of this Space.")
            
        #     submit_btn.click(
        #         submit_boundary,
        #         inputs=[problem_type, boundary_file, user_state],
        #         outputs=[message, filename],
        #     ).then(
        #         fn=show_output_box,
        #         inputs=[message],
        #         outputs=[message],
        #     ).then(
        #         fn=evaluate_boundary,
        #         inputs=[filename],
        #         outputs=[eval_state]
        #     )


if __name__ == "__main__":
    demo.launch()