File size: 7,891 Bytes
1c33a6b 281711d 2982a51 281711d 1bcb06b 281711d 1bcb06b 240a1de 638faee 240a1de 74b87cd 240a1de 8e2e988 240a1de 0fb1f33 311a1b8 240a1de 74b87cd 240a1de 74b87cd 240a1de 0fb1f33 638faee de75bee 1bcb06b be51a4e 1bcb06b d2c0741 11793f2 de75bee 1bcb06b 11793f2 1bcb06b de75bee 57878eb 1bcb06b 5b5ee28 11e5e48 1bcb06b 11793f2 57878eb 1bcb06b 2982a51 82c5741 50e75cf 82c5741 de75bee 11793f2 de75bee 1bcb06b 11e5e48 11793f2 11e5e48 11793f2 11e5e48 de75bee 1bcb06b 5b5ee28 de75bee 1bcb06b 11793f2 1bcb06b b8be656 11e5e48 1bcb06b 240a1de 74b87cd 240a1de 74b87cd f80e240 240a1de 281711d 1bcb06b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
from pathlib import Path
import json
import pandas as pd
import gradio as gr
from gradio_leaderboard import Leaderboard
from utils import read_submission_from_hub, write_results
from about import ASSAY_LIST, ASSAY_RENAME, ASSAY_EMOJIS, submissions_repo, API, results_repo
from typing import BinaryIO, Literal
from datetime import datetime
import tempfile
from datasets import load_dataset
import io
def make_submission(
submitted_file: BinaryIO,
user_state):
if user_state is None:
raise gr.Error("You must submit your username to submit a file.")
file_path = submitted_file.name
if not file_path:
raise gr.Error("Uploaded file object does not have a valid file path.")
path_obj = Path(file_path)
timestamp = datetime.utcnow().isoformat()
with (path_obj.open("rb") as f_in):
file_content = f_in.read().decode("utf-8")
# write to dataset
filename = f"{user_state}/{timestamp.replace(':', '-')}_{user_state}.json"
record = {
"submission_filename": filename,
"submission_time": timestamp,
"csv_content": file_content,
"evaluated": False,
"user": user_state,
}
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as tmp:
json.dump(record, tmp, indent=2)
tmp.flush()
tmp_name = tmp.name
API.upload_file(
path_or_fileobj=tmp_name,
path_in_repo=filename,
repo_id=submissions_repo,
repo_type="dataset",
commit_message=f"Add submission for {user_state} at {timestamp}"
)
Path(tmp_name).unlink()
return "✅ Your submission has been received! Sit tight and your scores will appear on the leaderboard shortly."
def get_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None):
# ds = load_dataset(results_repo, split='train', download_mode="force_redownload")
# full_df = pd.DataFrame(ds)
# full_df['full results'] = full_df['result_filename'].apply(lambda x: make_boundary_clickable(x)).astype(str)
# full_df.rename(columns={'submission_time': 'submission time', 'problem_type': 'problem type'}, inplace=True)
# to_show = full_df.copy(deep=True)
# to_show = to_show[to_show['user'] != 'test']
# to_show = to_show[['submission time', 'problem type', 'user', 'score', 'full results']]
# to_show['user'] = to_show['user'].apply(lambda x: make_user_clickable(x)).astype(str)
# Previously hosted on HF hub, local for now (Can also pull directly from github backend)
column_order = ["model", "property", "spearman", "spearman_cross_val"]
df = df_results.query("assay.isin(@ASSAY_RENAME.keys())").copy()
if assay is not None:
df = df[df['assay'] == assay]
df = df[column_order]
return df.sort_values(by="spearman", ascending=False)
def get_leaderboard_object(df_results: pd.DataFrame, assay: str | None = None):
df = get_leaderboard_table(df_results=df_results, assay=assay)
filter_columns = ["model"]
if assay is None:
filter_columns.append("property")
# TODO how to sort filter columns alphabetically?
Leaderboard(
value=df,
datatype=["str", "str", "str", "number"],
select_columns=["model", "property", "spearman", "spearman_cross_val"],
search_columns=["model"],
filter_columns=filter_columns,
every=60,
render=True
)
def show_output_box(message):
return gr.update(value=message, visible=True)
def fetch_hf_results():
ds = load_dataset(results_repo, split='no_low_spearman', download_mode="force_redownload")
df = pd.DataFrame(ds).drop_duplicates(subset=["model", "assay"])
df["property"] = df["assay"].map(ASSAY_RENAME)
print(df.head())
return df
with gr.Blocks() as demo:
gr.Markdown("""
## Welcome to the Ginkgo Antibody Developability Benchmark!
Participants can submit their model to the leaderboard by uploading a CSV file (see the "✉️ Submit" tab).
""")
df = fetch_hf_results()
with gr.Tabs(elem_classes="tab-buttons"):
# Procedurally make these 5 tabs
for assay in ASSAY_LIST:
with gr.TabItem(f"{ASSAY_EMOJIS[assay]} {ASSAY_RENAME[assay]}", elem_id=f"abdev-benchmark-tab-table"):
gr.Markdown(f"# {ASSAY_RENAME[assay]} (measured by {assay})")
get_leaderboard_object(df_results=df, assay=assay)
with gr.TabItem("🚀 Overall", elem_id="abdev-benchmark-tab-table"):
gr.Markdown("# Antibody Developability Benchmark Leaderboard over all properties")
get_leaderboard_object(df_results=df)
# TODO: this is not going to update well, need to fix
with gr.TabItem("❔About", elem_id="abdev-benchmark-tab-table"):
gr.Image(value="./assets/competition_logo.jpg")
gr.Markdown(
"""
## About this challenge
We're inviting the ML/bio community to predict developability properties for 244 antibodies from the [GDPa1 dataset](https://huggingface.co/datasets/ginkgo-datapoints/GDPa1).
**What is antibody developability?**
Antibodies have to be manufacturable, stable in high concentrations, and have low off-target effects.
Properties such as these can often hinder the progression of an antibody to the clinic, and are collectively referred to as 'developability'.
Here we show 5 of these properties and invite the community to submit and develop better predictors, which will be tested out on a heldout private set to assess model generalization.
**How to submit?**
TODO
**How to evaluate?**
TODO
"""
)
with gr.TabItem("✉️ Submit", elem_id="boundary-benchmark-tab-table"):
gr.Markdown(
"""
# Antibody Developability Submission
Upload a CSV to get a score!
"""
)
filename = gr.State(value=None)
eval_state = gr.State(value=None)
user_state = gr.State(value=None)
# gr.LoginButton()
with gr.Row():
with gr.Column():
username_input = gr.Textbox(
label="Username",
placeholder="Enter your Hugging Face username",
info="This will be displayed on the leaderboard."
)
with gr.Column():
boundary_file = gr.File(label="Submission CSV")
username_input.change(
fn=lambda x: x if x.strip() else None,
inputs=username_input,
outputs=user_state
)
submit_btn = gr.Button("Evaluate")
message = gr.Textbox(label="Status", lines=1, visible=False)
# help message
gr.Markdown("If you have issues with submission or using the leaderboard, please start a discussion in the Community tab of this Space.")
submit_btn.click(
make_submission,
inputs=[boundary_file, user_state],
outputs=[message],
).then(
fn=show_output_box,
inputs=[message],
outputs=[message],
)
if __name__ == "__main__":
demo.launch()
|