abdev-leaderboard

Running

File size: 12,224 Bytes

2982a51
281711d
 
5554fb7
1bcb06b
281711d
61fa714
84fdef4
61fa714
84fdef4
10e69e7
22f82e7
61fa714
15ae508
61fa714
 
 
84fdef4
3edbc93
61fa714
8f9985e
ba1131a
de75bee
1bcb06b
8f9985e
10e69e7
177a597
 
 
 
 
 
61fa714
 
 
177a597
1bcb06b
8f9985e
fe04bb9
10e69e7
1bcb06b
5b5ee28
11e5e48
10e69e7
de9585b
 
 
1bcb06b
61fa714
 
 
de9585b
8f9985e
1bcb06b
de9585b
 
2982a51
de9585b
 
8f9985e
5554fb7
 
6c94821
 
de9585b
 
 
6c94821
 
 
 
 
 
 
de9585b
6c94821
de9585b
22f82e7
 
 
 
 
 
 
8f9985e
22f82e7
8f9985e
22f82e7
 
 
 
 
 
 
3d4c9af
 
 
 
22f82e7
3d4c9af
22f82e7
 
 
21f87d6
 
3edbc93
8f9985e
813ce52
61fa714
 
 
 
 
 
 
6c94821
61fa714
 
6c94821
61fa714
 
8f9985e
61fa714
 
8f9985e
61fa714
8f9985e
de9585b
6c94821
 
 
 
 
 
61fa714
 
 
 
 
3edbc93
61fa714
22f82e7
84fdef4
 
3edbc93
 
 
 
809a553
10e69e7
22f82e7
 
 
 
 
 
a7cc355
 
 
 
8f9985e
3edbc93
a7cc355
10e69e7
a7cc355
 
 
 
471531b
 
 
 
 
3edbc93
84fdef4
7ac33bb
84fdef4
 
61fa714
84fdef4
 
1a2d1c6
84fdef4
 
 
813ce52
3edbc93
84fdef4
5d5df93
 
 
 
 
84fdef4
7ac33bb
84fdef4
d834d59
1a2d1c6
 
84fdef4
 
 
 
d834d59
84fdef4
 
 
 
 
 
 
 
 
 
 
 
3edbc93
 
 
8f9985e
 
 
 
3edbc93
 
a7cc355
 
34f59c0
a7cc355
 
 
1de74c6
471531b
a7cc355
3edbc93
 
 
 
 
 
61fa714
22f82e7
 
 
 
 
 
 
ba1131a
 
15ae508
ba1131a
8dcd98f
15ae508
ba1131a
 
8f9985e
ba1131a
281711d
 
d4cc92c

import pandas as pd

import gradio as gr
from gradio.themes.utils import colors, fonts, sizes
from gradio_leaderboard import Leaderboard

from about import ABOUT_INTRO, ABOUT_TEXT, FAQS, SUBMIT_INTRUCTIONS
from constants import (
    ASSAY_RENAME,  # keep this: used in df query
    EXAMPLE_FILE_DICT,
    LEADERBOARD_DISPLAY_COLUMNS,
    ABOUT_TAB_NAME,
    FAQ_TAB_NAME,
    TERMS_URL,
    LEADERBOARD_COLUMNS_RENAME,
    LEADERBOARD_COLUMNS_RENAME_LIST,
    SUBMIT_TAB_NAME,
)
from submit import make_submission
from utils import fetch_hf_results, show_output_box

def format_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None):
    df = df_results.query("assay.isin(@ASSAY_RENAME.keys())").copy()
    if assay is not None:
        df = df[df["assay"] == assay]
    df = df[LEADERBOARD_DISPLAY_COLUMNS]
    df = df.sort_values(by="spearman", ascending=False)
    # After sorting, just add the reason for excluding heldout test set
    # Note: We can also just say the following as a text box at the bottom of the leaderboard: "Note: Results for the Heldout Test Set are only evaluated at competition close"
    # Convert spearman column to string to avoid dtype incompatibility when assigning text
    df["spearman"] = df["spearman"].astype(str)
    df.loc[(df["dataset"] == "Heldout Test Set") & (df["spearman"] == "nan"), "spearman"] = "N/A, evaluated at competition close"
    
    # Finally, rename columns for readability
    df = df.rename(columns=LEADERBOARD_COLUMNS_RENAME)
    return df


def get_leaderboard_object(assay: str | None = None):
    filter_columns = ["dataset"]
    if assay is None:
        filter_columns.append("property")
    # TODO how to sort filter columns alphabetically?
    # Bug: Can't leave search_columns empty because then it says "Column None not found in headers"
    # Note(Lood): Would be nice to make it clear that the Search Column is searching on model name
    lb = Leaderboard(
        value=format_leaderboard_table(df_results=current_dataframe, assay=assay),
        datatype=["str", "str", "str", "number"],
        select_columns=LEADERBOARD_COLUMNS_RENAME_LIST(["model", "property", "spearman", "dataset"]),
        search_columns=["Model Name"],  
        filter_columns=LEADERBOARD_COLUMNS_RENAME_LIST(filter_columns),
        every=15,
        render=True,
    )
    return lb


# Initialize global dataframe
current_dataframe = fetch_hf_results()

# Make font size bigger using gradio theme
with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
    timer = gr.Timer(3)  # Run every 3 seconds when page is focused
    data_version = gr.State(value=0)  # Track data changes
    
    def update_current_dataframe():
        global current_dataframe
        new_dataframe = fetch_hf_results()
        
        # Check if data has actually changed
        if not current_dataframe.equals(new_dataframe):
            current_dataframe = new_dataframe
            return data_version.value + 1  # Increment version to trigger updates
        return data_version.value
    
    timer.tick(fn=update_current_dataframe, outputs=data_version)
    
    ## Header
    
    with gr.Row():
        with gr.Column(scale=6):  # bigger text area
            gr.Markdown(
                f"""
                ## Welcome to the Ginkgo Antibody Developability Benchmark!

                **Beta version, not publicly launched yet**

                Participants can submit their model to the leaderboards by simply uploading a CSV file (see the "✉️ Submit" tab).
                
                You can **predict any or all of the 5 properties**, and each property has its own leaderboard.
                See more details in the "{ABOUT_TAB_NAME}" tab.
                """
            )
        with gr.Column(scale=2):  # smaller side column for logo
            gr.Image(
                value="./assets/competition_logo.jpg",
                show_label=False,
                show_download_button=False,
                width="25vw", # Take up the width of the column (2/8 = 1/4)
            )
    
    with gr.Tabs(elem_classes="tab-buttons"):
        with gr.TabItem(ABOUT_TAB_NAME, elem_id="abdev-benchmark-tab-table"):
            gr.Markdown(ABOUT_INTRO)
            gr.Image(value="./assets/prediction_explainer.png", show_label=False, show_download_button=False, width="50vw")
            gr.Markdown(ABOUT_TEXT)

        # Procedurally make these 5 tabs
        # for i, assay in enumerate(ASSAY_LIST):
        #     with gr.TabItem(
        #         f"{ASSAY_EMOJIS[assay]} {ASSAY_RENAME[assay]}",
        #         elem_id="abdev-benchmark-tab-table",
        #     ) as tab_item:
        #         gr.Markdown(f"# {ASSAY_DESCRIPTION[assay]}")
        #         lb = get_leaderboard_object(assay=assay)
                
        #         def refresh_leaderboard(assay=assay):
        #             return format_leaderboard_table(df_results=current_dataframe, assay=assay)
                
        #         # Refresh when data version changes
        #         data_version.change(fn=refresh_leaderboard, outputs=lb)

        # Note(Lood): Trying out just one leaderboard. We could also have a dropdown here that shows different leaderboards for each property, but that's just the same as the filters
        with gr.TabItem("🏆 Leaderboard", elem_id="abdev-benchmark-tab-table") as leaderboard_tab:
            gr.Markdown(
                "# Overall Leaderboard (filter below by property)"  # TODO add details about the 6 prizes here
            )
            lb = get_leaderboard_object()
            
            def refresh_overall_leaderboard():
                return format_leaderboard_table(df_results=current_dataframe)
            
            # Refresh when data version changes
            data_version.change(fn=refresh_overall_leaderboard, outputs=lb)
            
            # At the bottom of the leaderboard, we can keep as NaN and explain missing test set results
            # gr.Markdown(
            #     "_ℹ️ Results for the private test set will not be shown here and will be used for final judging at the close of the competition._"
            # )

        with gr.TabItem(SUBMIT_TAB_NAME, elem_id="boundary-benchmark-tab-table"):
            gr.Markdown(SUBMIT_INTRUCTIONS)
            submission_type_state = gr.State(value="GDPa1")
            download_file_state = gr.State(value=EXAMPLE_FILE_DICT["GDPa1"])

            with gr.Row():
                with gr.Column():
                    username_input = gr.Textbox(
                        label="Username",
                        placeholder="Enter your Hugging Face username",
                        info="This will be used to identify valid submissions, and to update your results if you submit again.",
                    )
                    anonymous_checkbox = gr.Checkbox(
                        label="Anonymous",
                        value=False,
                        info="If checked, your username will be replaced with an anonymous hash on the leaderboard.",
                    )
                    model_name_input = gr.Textbox(
                        label="Model Name",
                        placeholder="Enter your model name (e.g., 'MyProteinLM-v1')",
                        info="This will be displayed on the leaderboard.",
                    )
                    model_description_input = gr.Textbox(
                        label="Model Description (optional)",
                        placeholder="Brief description of your model and approach",
                        info="Describe your model, training data, or methodology.",
                        lines=3,
                    )
                    registration_code = gr.Textbox(
                        label="Registration Code",
                        placeholder="Enter your registration code",
                        info="If you did not receive a registration code, please sign up on the <a href='https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition'>Competition Registration page</a> or email <a href='mailto:[email protected]'>[email protected]</a>.",
                    )
                with gr.Column():
                    submission_type_dropdown = gr.Dropdown(
                        choices=["GDPa1", "GDPa1_cross_validation", "Heldout Test Set"],
                        value="GDPa1",
                        label="Submission Type",
                        info=f"Choose the dataset corresponding to the track you're participating in. See the '{ABOUT_TAB_NAME}' tab for details.",
                    )
                    download_button = gr.DownloadButton(
                        label="📥 Download example submission CSV for GDPa1",
                        value=EXAMPLE_FILE_DICT["GDPa1"],
                        variant="secondary",
                    )
                    submission_file = gr.File(label="Submission CSV")

            def update_submission_type_and_file(submission_type):
                """
                Based on the submission type selected in the dropdown,
                    Update the submission type state
                    Dynamically update example file for download
                """
                download_file = EXAMPLE_FILE_DICT.get(
                    submission_type, EXAMPLE_FILE_DICT[submission_type]
                )
                download_label = (
                    f"📥 Download example submission CSV for {submission_type}"
                )
                return (
                    submission_type,
                    download_file,
                    gr.DownloadButton(
                        label=download_label,
                        value=download_file,
                        variant="secondary",
                    ),
                )

            # Update submission type state and download button when dropdown changes
            submission_type_dropdown.change(
                fn=update_submission_type_and_file,
                inputs=submission_type_dropdown,
                outputs=[submission_type_state, download_file_state, download_button],
            )

            submit_btn = gr.Button("Evaluate")
            message = gr.Textbox(label="Status", lines=1, visible=False)
            # help message
            gr.Markdown(
                "If you have issues with submission or using the leaderboard, please start a discussion in the Community tab of this Space."
            )

            submit_btn.click(
                make_submission,
                inputs=[
                    submission_file,
                    username_input,
                    submission_type_state,
                    model_name_input,
                    model_description_input,
                    anonymous_checkbox,
                    registration_code,
                ],
                outputs=[message],
            ).then(
                fn=show_output_box,
                inputs=[message],
                outputs=[message],
            )
        with gr.Tab(FAQ_TAB_NAME):
            gr.Markdown("# Frequently Asked Questions")
            for i, (question, answer) in enumerate(FAQS.items()):
                # Would love to make questions bold but accordion doesn't support it
                question = f"{i+1}. {question}"
                with gr.Accordion(question, open=False):
                    gr.Markdown(f"*{answer}*")  # Italics for answers
        
    # Footnote
    gr.Markdown(
        f"""
        <div style="text-align: center; font-size: 14px; color: gray; margin-top: 2em;">
        📬 For questions or feedback, contact <a href="mailto:[email protected]">[email protected]</a> or visit the Community tab at the top of this page.<br>
        Visit the <a href="https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition">Competition Registration page</a> to sign up for updates and to register a team, and see Terms <a href="{TERMS_URL}">here</a>.
        </div>
        """,
        elem_id="contact-footer",
    )

if __name__ == "__main__":
    demo.launch(ssr_mode=False)