File size: 8,145 Bytes
1c33a6b 281711d 2982a51 281711d 1bcb06b 281711d 1bcb06b 5b5ee28 956b725 1c33a6b c6e6b77 f2bcfea 1c33a6b 3c0df4a 8e2e988 1c33a6b 1bcb06b be51a4e 1bcb06b d2c0741 57878eb 5b5ee28 d2c0741 1bcb06b d2c0741 1bcb06b 57878eb 1bcb06b 5b5ee28 11e5e48 1bcb06b 57878eb 1bcb06b 2982a51 82c5741 50e75cf 82c5741 1bcb06b 11e5e48 1bcb06b 5b5ee28 1bcb06b b8be656 11e5e48 1bcb06b 281711d 1bcb06b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 |
from pathlib import Path
import json
import pandas as pd
import gradio as gr
from gradio_leaderboard import Leaderboard
from evaluation import evaluate_problem
from utils import read_submission_from_hub, write_results
from about import ASSAY_LIST, ASSAY_RENAME, ASSAY_EMOJIS
def evaluate_boundary(filename):
print(filename)
local_path = read_submission_from_hub(filename)
with Path(local_path).open("r") as f:
raw = f.read()
data_dict = json.loads(raw)
try:
result = evaluate_problem(data_dict['problem_type'], local_path)
except Exception as e:
raise gr.Error(f'Evaluation failed: {e}. No results written to results dataset.')
write_results(data_dict, result)
return
def get_leaderboard_table(assay: str | None = None):
# ds = load_dataset(results_repo, split='train', download_mode="force_redownload")
# full_df = pd.DataFrame(ds)
# full_df['full results'] = full_df['result_filename'].apply(lambda x: make_boundary_clickable(x)).astype(str)
# full_df.rename(columns={'submission_time': 'submission time', 'problem_type': 'problem type'}, inplace=True)
# to_show = full_df.copy(deep=True)
# to_show = to_show[to_show['user'] != 'test']
# to_show = to_show[['submission time', 'problem type', 'user', 'score', 'full results']]
# to_show['user'] = to_show['user'].apply(lambda x: make_user_clickable(x)).astype(str)
# Previously hosted on HF hub, local for now (Can also pull directly from github backend)
column_order = ["model", "property", "spearman", "spearman_abs"] # "assay",
df = pd.read_csv("data/metrics_all.csv").drop_duplicates(subset=["model", "assay"])
df["property"] = df["assay"].map(ASSAY_RENAME)
df = df.query("assay.isin(@ASSAY_RENAME.keys())")
if assay is not None:
df = df[df['assay'] == assay]
df = df[column_order]
return df.sort_values(by="spearman_abs", ascending=False)
def get_leaderboard_object(assay: str | None = None):
df = get_leaderboard_table(assay=assay)
filter_columns = ["model"]
if assay is None:
filter_columns.append("property")
# TODO how to sort filter columns alphabetically?
Leaderboard(
value=df,
datatype=["str", "str", "str", "number"],
select_columns=["model", "property", "spearman"],
search_columns=["model"],
filter_columns=filter_columns,
every=60,
render=True
)
def show_output_box(message):
return gr.update(value=message, visible=True)
#
# def gradio_interface() -> gr.Blocks:
with gr.Blocks() as demo:
gr.Markdown("""
## Welcome to the Ginkgo Antibody Developability Benchmark Leaderboard!
Participants can submit their model to the leaderboard by
""")
with gr.Tabs(elem_classes="tab-buttons"):
with gr.TabItem("🚀 Leaderboard", elem_id="abdev-benchmark-tab-table"):
gr.Markdown("# Antibody Developability Benchmark Leaderboard")
get_leaderboard_object()
# gr.Markdown("Extra info here")
# Procedurally make these 5 tabs
for assay in ASSAY_LIST:
with gr.TabItem(f"{ASSAY_EMOJIS[assay]} {ASSAY_RENAME[assay]}", elem_id=f"abdev-benchmark-tab-table"):
gr.Markdown(f"# {ASSAY_RENAME[assay]} (measured by {assay})")
get_leaderboard_object(assay=assay)
with gr.TabItem("❔About", elem_id="abdev-benchmark-tab-table"):
gr.Markdown(
"""
## About this challenge
We're inviting the ML/bio community to predict developability properties for 244 antibodies from the [GDPa1 dataset](https://huggingface.co/datasets/ginkgo-datapoints/GDPa1).
**What is antibody developability?**
Antibodies have to be manufacturable, stable in high concentrations, and have low off-target effects.
Properties such as these can often hinder the progression of an antibody to the clinic, and are collectively referred to as 'developability'.
Here we show 5 of these properties and invite the community to submit and develop better predictors, which will be tested out on a heldout private set to assess model generalization.
**How to submit?**
TODO
**How to evaluate?**
TODO
"""
)
# dropdown = gr.Dropdown(choices=filenames, label="Choose a file")
# plot_output = gr.Plot()
# with gr.TabItem("🔍 Visualize", elem_id="boundary-benchmark-tab-table"):
# ds = load_dataset(results_repo, split='train', download_mode="force_redownload")
# full_df = pd.DataFrame(ds)
# filenames = full_df['result_filename'].to_list()
# with gr.Row():
# with gr.Column():
# dropdown = gr.Dropdown(choices=filenames, label="Choose a leaderboard entry", value=filenames[0])
# rld_btn = gr.Button(value="Reload")
# with gr.Column():
# plot = gr.Plot()
# def get_boundary_vis(selected_file):
# local_path = read_result_from_hub(selected_file)
# with Path(local_path).open("r") as f:
# raw = f.read()
# data_dict = json.loads(raw)
# boundary_json = data_dict['boundary_json']
# if data_dict['problem_type'] == 'mhd_stable':
# raise gr.Error("Sorry this isn't implemented for mhd_stable submissions yet!")
# else:
# boundary = load_boundary(boundary_json)
# vis = make_visual(boundary)
# return vis
# demo.load(get_boundary_vis, dropdown, plot)
# rld_btn.click(get_boundary_vis, dropdown, plot)
# with gr.TabItem("✉️ Submit", elem_id="boundary-benchmark-tab-table"):
# gr.Markdown(
# """
# # Plasma Boundary Evaluation Submission
# Upload your plasma boundary JSON and select the problem type to get your score.
# """
# )
# filename = gr.State(value=None)
# eval_state = gr.State(value=None)
# user_state = gr.State(value=None)
# # gr.LoginButton()
# with gr.Row():
# with gr.Column():
# problem_type = gr.Dropdown(PROBLEM_TYPES, label="Problem Type")
# username_input = gr.Textbox(
# label="Username",
# placeholder="Enter your Hugging Face username",
# info="This will be displayed on the leaderboard."
# )
# with gr.Column():
# boundary_file = gr.File(label="Boundary JSON File (.json)")
# username_input.change(
# fn=lambda x: x if x.strip() else None,
# inputs=username_input,
# outputs=user_state
# )
# submit_btn = gr.Button("Evaluate")
# message = gr.Textbox(label="Status", lines=1, visible=False)
# # help message
# gr.Markdown("If you have issues with submission or using the leaderboard, please start a discussion in the Community tab of this Space.")
# submit_btn.click(
# submit_boundary,
# inputs=[problem_type, boundary_file, user_state],
# outputs=[message, filename],
# ).then(
# fn=show_output_box,
# inputs=[message],
# outputs=[message],
# ).then(
# fn=evaluate_boundary,
# inputs=[filename],
# outputs=[eval_state]
# )
if __name__ == "__main__":
demo.launch()
|