|
from pathlib import Path |
|
import json |
|
import pandas as pd |
|
|
|
import gradio as gr |
|
from gradio_leaderboard import Leaderboard |
|
|
|
from utils import read_submission_from_hub, write_results |
|
from about import ASSAY_LIST, ASSAY_RENAME, ASSAY_EMOJIS, submissions_repo, API, results_repo |
|
from typing import BinaryIO, Literal |
|
from datetime import datetime |
|
import tempfile |
|
from datasets import load_dataset |
|
import io |
|
|
|
def make_submission( |
|
submitted_file: BinaryIO, |
|
user_state): |
|
|
|
if user_state is None: |
|
raise gr.Error("You must submit your username to submit a file.") |
|
|
|
file_path = submitted_file.name |
|
|
|
if not file_path: |
|
raise gr.Error("Uploaded file object does not have a valid file path.") |
|
|
|
path_obj = Path(file_path) |
|
timestamp = datetime.utcnow().isoformat() |
|
|
|
with (path_obj.open("rb") as f_in): |
|
file_content = f_in.read() |
|
|
|
|
|
filename = f"{user_state}/{timestamp.replace(':', '-')}_{user_state}.json" |
|
record = { |
|
"submission_filename": filename, |
|
"submission_time": timestamp, |
|
"csv_content": file_content, |
|
"evaluated": False, |
|
"user": user_state, |
|
} |
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as tmp: |
|
json.dump(record, tmp, indent=2) |
|
tmp.flush() |
|
tmp_name = tmp.name |
|
|
|
API.upload_file( |
|
path_or_fileobj=tmp_name, |
|
path_in_repo=filename, |
|
repo_id=submissions_repo, |
|
repo_type="dataset", |
|
commit_message=f"Add submission for {user_state} at {timestamp}" |
|
) |
|
Path(tmp_name).unlink() |
|
|
|
return "✅ Your submission has been received! Sit tight and your scores will appear on the leaderboard shortly." |
|
|
|
|
|
def get_leaderboard_table(assay: str | None = None): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
column_order = ["model", "property", "spearman", "spearman_abs"] |
|
ds = load_dataset(results_repo, split='no_low_spearman', download_mode="force_redownload") |
|
df = pd.DataFrame(ds).drop_duplicates(subset=["model", "assay"]) |
|
df["property"] = df["assay"].map(ASSAY_RENAME) |
|
df = df.query("assay.isin(@ASSAY_RENAME.keys())") |
|
if assay is not None: |
|
df = df[df['assay'] == assay] |
|
df = df[column_order] |
|
return df.sort_values(by="spearman_abs", ascending=False) |
|
|
|
def get_leaderboard_object(assay: str | None = None): |
|
df = get_leaderboard_table(assay=assay) |
|
filter_columns = ["model"] |
|
if assay is None: |
|
filter_columns.append("property") |
|
|
|
Leaderboard( |
|
value=df, |
|
datatype=["str", "str", "str", "number"], |
|
select_columns=["model", "property", "spearman"], |
|
search_columns=["model"], |
|
filter_columns=filter_columns, |
|
every=60, |
|
render=True |
|
) |
|
|
|
def show_output_box(message): |
|
return gr.update(value=message, visible=True) |
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown(""" |
|
## Welcome to the Ginkgo Antibody Developability Benchmark Leaderboard! |
|
|
|
Participants can submit their model to the leaderboard by |
|
""") |
|
with gr.Tabs(elem_classes="tab-buttons"): |
|
with gr.TabItem("🚀 Leaderboard", elem_id="abdev-benchmark-tab-table"): |
|
gr.Markdown("# Antibody Developability Benchmark Leaderboard") |
|
|
|
get_leaderboard_object() |
|
|
|
|
|
|
|
|
|
|
|
for assay in ASSAY_LIST: |
|
with gr.TabItem(f"{ASSAY_EMOJIS[assay]} {ASSAY_RENAME[assay]}", elem_id=f"abdev-benchmark-tab-table"): |
|
gr.Markdown(f"# {ASSAY_RENAME[assay]} (measured by {assay})") |
|
get_leaderboard_object(assay=assay) |
|
|
|
with gr.TabItem("❔About", elem_id="abdev-benchmark-tab-table"): |
|
gr.Markdown( |
|
""" |
|
## About this challenge |
|
|
|
We're inviting the ML/bio community to predict developability properties for 244 antibodies from the [GDPa1 dataset](https://huggingface.co/datasets/ginkgo-datapoints/GDPa1). |
|
|
|
**What is antibody developability?** |
|
|
|
Antibodies have to be manufacturable, stable in high concentrations, and have low off-target effects. |
|
Properties such as these can often hinder the progression of an antibody to the clinic, and are collectively referred to as 'developability'. |
|
Here we show 5 of these properties and invite the community to submit and develop better predictors, which will be tested out on a heldout private set to assess model generalization. |
|
|
|
**How to submit?** |
|
|
|
TODO |
|
|
|
**How to evaluate?** |
|
|
|
TODO |
|
""" |
|
) |
|
|
|
with gr.TabItem("✉️ Submit", elem_id="boundary-benchmark-tab-table"): |
|
gr.Markdown( |
|
""" |
|
# Antibody Developability Submission |
|
Upload a CSV to get a score! |
|
""" |
|
) |
|
filename = gr.State(value=None) |
|
eval_state = gr.State(value=None) |
|
user_state = gr.State(value=None) |
|
|
|
|
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
username_input = gr.Textbox( |
|
label="Username", |
|
placeholder="Enter your Hugging Face username", |
|
info="This will be displayed on the leaderboard." |
|
) |
|
with gr.Column(): |
|
boundary_file = gr.File(label="Submission CSV") |
|
|
|
username_input.change( |
|
fn=lambda x: x if x.strip() else None, |
|
inputs=username_input, |
|
outputs=user_state |
|
) |
|
|
|
submit_btn = gr.Button("Evaluate") |
|
message = gr.Textbox(label="Status", lines=1, visible=False) |
|
|
|
gr.Markdown("If you have issues with submission or using the leaderboard, please start a discussion in the Community tab of this Space.") |
|
|
|
submit_btn.click( |
|
make_submission, |
|
inputs=[boundary_file, user_state], |
|
outputs=[message, filename], |
|
).then( |
|
fn=show_output_box, |
|
inputs=[message], |
|
outputs=[message], |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|