cgeorgiaw's picture
cgeorgiaw HF Staff
different submit function
0fb1f33
raw
history blame
7.63 kB
from pathlib import Path
import json
import pandas as pd
import gradio as gr
from gradio_leaderboard import Leaderboard
from utils import read_submission_from_hub, write_results
from about import ASSAY_LIST, ASSAY_RENAME, ASSAY_EMOJIS, submissions_repo, API, results_repo
from typing import BinaryIO, Literal
from datetime import datetime
import tempfile
from datasets import load_dataset
import io
def make_submission(
submitted_file: BinaryIO,
user_state):
if user_state is None:
raise gr.Error("You must submit your username to submit a file.")
file_path = submitted_file.name
if not file_path:
raise gr.Error("Uploaded file object does not have a valid file path.")
path_obj = Path(file_path)
timestamp = datetime.utcnow().isoformat()
with (path_obj.open("rb") as f_in):
file_content = f_in.read()
# write to dataset
filename = f"{user_state}/{timestamp.replace(':', '-')}_{user_state}.json"
record = {
"submission_filename": filename,
"submission_time": timestamp,
"csv_content": file_content,
"evaluated": False,
"user": user_state,
}
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as tmp:
json.dump(record, tmp, indent=2)
tmp.flush()
tmp_name = tmp.name
API.upload_file(
path_or_fileobj=tmp_name,
path_in_repo=filename,
repo_id=submissions_repo,
repo_type="dataset",
commit_message=f"Add submission for {user_state} at {timestamp}"
)
Path(tmp_name).unlink()
return "✅ Your submission has been received! Sit tight and your scores will appear on the leaderboard shortly."
def get_leaderboard_table(assay: str | None = None):
# ds = load_dataset(results_repo, split='train', download_mode="force_redownload")
# full_df = pd.DataFrame(ds)
# full_df['full results'] = full_df['result_filename'].apply(lambda x: make_boundary_clickable(x)).astype(str)
# full_df.rename(columns={'submission_time': 'submission time', 'problem_type': 'problem type'}, inplace=True)
# to_show = full_df.copy(deep=True)
# to_show = to_show[to_show['user'] != 'test']
# to_show = to_show[['submission time', 'problem type', 'user', 'score', 'full results']]
# to_show['user'] = to_show['user'].apply(lambda x: make_user_clickable(x)).astype(str)
# Previously hosted on HF hub, local for now (Can also pull directly from github backend)
column_order = ["model", "property", "spearman", "spearman_abs"] # "assay",
ds = load_dataset(results_repo, split='no_low_spearman', download_mode="force_redownload")
df = pd.DataFrame(ds).drop_duplicates(subset=["model", "assay"])
df["property"] = df["assay"].map(ASSAY_RENAME)
df = df.query("assay.isin(@ASSAY_RENAME.keys())")
if assay is not None:
df = df[df['assay'] == assay]
df = df[column_order]
return df.sort_values(by="spearman_abs", ascending=False)
def get_leaderboard_object(assay: str | None = None):
df = get_leaderboard_table(assay=assay)
filter_columns = ["model"]
if assay is None:
filter_columns.append("property")
# TODO how to sort filter columns alphabetically?
Leaderboard(
value=df,
datatype=["str", "str", "str", "number"],
select_columns=["model", "property", "spearman"],
search_columns=["model"],
filter_columns=filter_columns,
every=60,
render=True
)
def show_output_box(message):
return gr.update(value=message, visible=True)
#
# def gradio_interface() -> gr.Blocks:
with gr.Blocks() as demo:
gr.Markdown("""
## Welcome to the Ginkgo Antibody Developability Benchmark Leaderboard!
Participants can submit their model to the leaderboard by
""")
with gr.Tabs(elem_classes="tab-buttons"):
with gr.TabItem("🚀 Leaderboard", elem_id="abdev-benchmark-tab-table"):
gr.Markdown("# Antibody Developability Benchmark Leaderboard")
get_leaderboard_object()
# TODO: this is not going to update well, need to fix
# gr.Markdown("Extra info here")
# Procedurally make these 5 tabs
for assay in ASSAY_LIST:
with gr.TabItem(f"{ASSAY_EMOJIS[assay]} {ASSAY_RENAME[assay]}", elem_id=f"abdev-benchmark-tab-table"):
gr.Markdown(f"# {ASSAY_RENAME[assay]} (measured by {assay})")
get_leaderboard_object(assay=assay)
with gr.TabItem("❔About", elem_id="abdev-benchmark-tab-table"):
gr.Markdown(
"""
## About this challenge
We're inviting the ML/bio community to predict developability properties for 244 antibodies from the [GDPa1 dataset](https://huggingface.co/datasets/ginkgo-datapoints/GDPa1).
**What is antibody developability?**
Antibodies have to be manufacturable, stable in high concentrations, and have low off-target effects.
Properties such as these can often hinder the progression of an antibody to the clinic, and are collectively referred to as 'developability'.
Here we show 5 of these properties and invite the community to submit and develop better predictors, which will be tested out on a heldout private set to assess model generalization.
**How to submit?**
TODO
**How to evaluate?**
TODO
"""
)
with gr.TabItem("✉️ Submit", elem_id="boundary-benchmark-tab-table"):
gr.Markdown(
"""
# Antibody Developability Submission
Upload a CSV to get a score!
"""
)
filename = gr.State(value=None)
eval_state = gr.State(value=None)
user_state = gr.State(value=None)
# gr.LoginButton()
with gr.Row():
with gr.Column():
username_input = gr.Textbox(
label="Username",
placeholder="Enter your Hugging Face username",
info="This will be displayed on the leaderboard."
)
with gr.Column():
boundary_file = gr.File(label="Submission CSV")
username_input.change(
fn=lambda x: x if x.strip() else None,
inputs=username_input,
outputs=user_state
)
submit_btn = gr.Button("Evaluate")
message = gr.Textbox(label="Status", lines=1, visible=False)
# help message
gr.Markdown("If you have issues with submission or using the leaderboard, please start a discussion in the Community tab of this Space.")
submit_btn.click(
make_submission,
inputs=[boundary_file, user_state],
outputs=[message, filename],
).then(
fn=show_output_box,
inputs=[message],
outputs=[message],
)
if __name__ == "__main__":
demo.launch()