abdev-leaderboard

Running

File size: 7,891 Bytes

1c33a6b
281711d
2982a51
281711d
 
1bcb06b
281711d
1bcb06b
240a1de
 
 
 
 
638faee
240a1de
74b87cd
240a1de
 
 
 
 
8e2e988
240a1de
 
 
 
 
 
 
 
0fb1f33
311a1b8
240a1de
 
74b87cd
240a1de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74b87cd
240a1de
 
 
 
0fb1f33
638faee
de75bee
1bcb06b
 
 
be51a4e
1bcb06b
 
 
 
 
 
d2c0741
11793f2
de75bee
1bcb06b
 
 
11793f2
1bcb06b
de75bee
 
57878eb
1bcb06b
5b5ee28
11e5e48
1bcb06b
 
 
11793f2
57878eb
1bcb06b
 
 
 
2982a51
82c5741
50e75cf
82c5741
de75bee
 
 
 
11793f2
de75bee
 
1bcb06b
11e5e48
11793f2
11e5e48
11793f2
11e5e48
de75bee
1bcb06b
 
 
5b5ee28
 
de75bee
 
 
 
 
 
1bcb06b
 
11793f2
1bcb06b
b8be656
11e5e48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1bcb06b
 
 
240a1de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74b87cd
240a1de
 
 
 
 
 
 
 
 
 
 
 
 
 
74b87cd
f80e240
240a1de
 
 
 
 
281711d
 
 
1bcb06b

from pathlib import Path
import json
import pandas as pd

import gradio as gr
from gradio_leaderboard import Leaderboard

from utils import read_submission_from_hub, write_results
from about import ASSAY_LIST, ASSAY_RENAME, ASSAY_EMOJIS, submissions_repo, API, results_repo
from typing import BinaryIO, Literal
from datetime import datetime
import tempfile
from datasets import load_dataset
import io

def make_submission(
    submitted_file: BinaryIO,
    user_state):

    if user_state is None:
        raise gr.Error("You must submit your username to submit a file.")
    
    file_path = submitted_file.name

    if not file_path:
        raise gr.Error("Uploaded file object does not have a valid file path.")
    
    path_obj = Path(file_path)
    timestamp = datetime.utcnow().isoformat()

    with (path_obj.open("rb") as f_in):
        file_content = f_in.read().decode("utf-8")

        # write to dataset
        filename = f"{user_state}/{timestamp.replace(':', '-')}_{user_state}.json"
        record = {
            "submission_filename": filename,
            "submission_time": timestamp,
            "csv_content": file_content,
            "evaluated": False,
            "user": user_state,
        }
        with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as tmp:
            json.dump(record, tmp, indent=2)
            tmp.flush()
            tmp_name = tmp.name

        API.upload_file(
            path_or_fileobj=tmp_name, 
            path_in_repo=filename,
            repo_id=submissions_repo,
            repo_type="dataset",
            commit_message=f"Add submission for {user_state} at {timestamp}"
        )
        Path(tmp_name).unlink()

    return "✅ Your submission has been received! Sit tight and your scores will appear on the leaderboard shortly."

    
def get_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None):
    # ds = load_dataset(results_repo, split='train', download_mode="force_redownload")
    # full_df = pd.DataFrame(ds)
    # full_df['full results'] = full_df['result_filename'].apply(lambda x: make_boundary_clickable(x)).astype(str)

    # full_df.rename(columns={'submission_time': 'submission time', 'problem_type': 'problem type'}, inplace=True)
    # to_show = full_df.copy(deep=True)
    # to_show = to_show[to_show['user'] != 'test']
    # to_show = to_show[['submission time', 'problem type', 'user', 'score', 'full results']]
    # to_show['user'] = to_show['user'].apply(lambda x: make_user_clickable(x)).astype(str)
    
    # Previously hosted on HF hub, local for now (Can also pull directly from github backend)
    column_order = ["model", "property", "spearman", "spearman_cross_val"]
    df = df_results.query("assay.isin(@ASSAY_RENAME.keys())").copy()
    if assay is not None:
        df = df[df['assay'] == assay]
    df = df[column_order]
    return df.sort_values(by="spearman", ascending=False)

def get_leaderboard_object(df_results: pd.DataFrame, assay: str | None = None):
    df = get_leaderboard_table(df_results=df_results, assay=assay)
    filter_columns = ["model"]
    if assay is None:
        filter_columns.append("property")
    # TODO how to sort filter columns alphabetically?
    Leaderboard(
        value=df,
        datatype=["str", "str", "str", "number"],
        select_columns=["model", "property", "spearman", "spearman_cross_val"],
        search_columns=["model"],
        filter_columns=filter_columns,
        every=60,
        render=True
    )

def show_output_box(message):
    return gr.update(value=message, visible=True)

def fetch_hf_results():
    ds = load_dataset(results_repo, split='no_low_spearman', download_mode="force_redownload")
    df = pd.DataFrame(ds).drop_duplicates(subset=["model", "assay"])
    df["property"] = df["assay"].map(ASSAY_RENAME)
    print(df.head())
    return df

with gr.Blocks() as demo:
    gr.Markdown("""
        ## Welcome to the Ginkgo Antibody Developability Benchmark!
        
        Participants can submit their model to the leaderboard by uploading a CSV file (see the "✉️ Submit" tab).
        """)
    df = fetch_hf_results()
    with gr.Tabs(elem_classes="tab-buttons"):
        # Procedurally make these 5 tabs
        for assay in ASSAY_LIST:
            with gr.TabItem(f"{ASSAY_EMOJIS[assay]} {ASSAY_RENAME[assay]}", elem_id=f"abdev-benchmark-tab-table"):
                gr.Markdown(f"# {ASSAY_RENAME[assay]} (measured by {assay})")
                get_leaderboard_object(df_results=df, assay=assay)
        
        with gr.TabItem("🚀 Overall", elem_id="abdev-benchmark-tab-table"):                
            gr.Markdown("# Antibody Developability Benchmark Leaderboard over all properties")
            get_leaderboard_object(df_results=df)
            # TODO: this is not going to update well, need to fix
        
        with gr.TabItem("❔About", elem_id="abdev-benchmark-tab-table"):
            gr.Image(value="./assets/competition_logo.jpg")
            gr.Markdown(
                """
                ## About this challenge
                
                We're inviting the ML/bio community to predict developability properties for 244 antibodies from the [GDPa1 dataset](https://huggingface.co/datasets/ginkgo-datapoints/GDPa1).
                
                **What is antibody developability?**
                
                Antibodies have to be manufacturable, stable in high concentrations, and have low off-target effects. 
                Properties such as these can often hinder the progression of an antibody to the clinic, and are collectively referred to as 'developability'.
                Here we show 5 of these properties and invite the community to submit and develop better predictors, which will be tested out on a heldout private set to assess model generalization.
                
                **How to submit?**
                
                TODO
                
                **How to evaluate?**
                
                TODO
            """
            )

        with gr.TabItem("✉️ Submit", elem_id="boundary-benchmark-tab-table"):
                gr.Markdown(
                    """
                # Antibody Developability Submission
                Upload a CSV to get a score!
                """
                )
                filename = gr.State(value=None) 
                eval_state = gr.State(value=None) 
                user_state = gr.State(value=None)

                # gr.LoginButton()

                with gr.Row():
                    with gr.Column():
                        username_input = gr.Textbox(
                            label="Username", 
                            placeholder="Enter your Hugging Face username",
                            info="This will be displayed on the leaderboard."
                        )
                    with gr.Column():
                        boundary_file = gr.File(label="Submission CSV")

                username_input.change(
                    fn=lambda x: x if x.strip() else None,
                    inputs=username_input,
                    outputs=user_state
                )                       

                submit_btn = gr.Button("Evaluate")
                message = gr.Textbox(label="Status", lines=1, visible=False)
                # help message
                gr.Markdown("If you have issues with submission or using the leaderboard, please start a discussion in the Community tab of this Space.")
                
                submit_btn.click(
                    make_submission,
                    inputs=[boundary_file, user_state],
                    outputs=[message],
                ).then(
                    fn=show_output_box,
                    inputs=[message],
                    outputs=[message],
                )


if __name__ == "__main__":
    demo.launch()