File size: 8,145 Bytes
1c33a6b
281711d
2982a51
281711d
 
1bcb06b
281711d
 
1bcb06b
5b5ee28
956b725
1c33a6b
c6e6b77
f2bcfea
1c33a6b
 
 
3c0df4a
 
 
 
 
8e2e988
1c33a6b
 
 
1bcb06b
 
 
 
be51a4e
1bcb06b
 
 
 
 
 
d2c0741
57878eb
 
5b5ee28
d2c0741
1bcb06b
 
 
d2c0741
1bcb06b
 
 
57878eb
1bcb06b
5b5ee28
11e5e48
1bcb06b
 
 
57878eb
 
1bcb06b
 
 
 
2982a51
82c5741
50e75cf
82c5741
1bcb06b
 
 
11e5e48
 
 
 
 
1bcb06b
 
 
 
 
 
 
 
 
 
5b5ee28
 
1bcb06b
 
 
 
b8be656
11e5e48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1bcb06b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281711d
 
 
1bcb06b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
from pathlib import Path
import json
import pandas as pd

import gradio as gr
from gradio_leaderboard import Leaderboard
from evaluation import evaluate_problem

from utils import read_submission_from_hub, write_results
from about import ASSAY_LIST, ASSAY_RENAME, ASSAY_EMOJIS

def evaluate_boundary(filename):
    print(filename)
    local_path = read_submission_from_hub(filename)
    with Path(local_path).open("r") as f:
        raw = f.read()
        data_dict = json.loads(raw)

    try:
        result = evaluate_problem(data_dict['problem_type'], local_path)
    except Exception as e:
        raise gr.Error(f'Evaluation failed: {e}. No results written to results dataset.')
    
    write_results(data_dict, result)
    return

def get_leaderboard_table(assay: str | None = None):
    # ds = load_dataset(results_repo, split='train', download_mode="force_redownload")
    # full_df = pd.DataFrame(ds)
    # full_df['full results'] = full_df['result_filename'].apply(lambda x: make_boundary_clickable(x)).astype(str)

    # full_df.rename(columns={'submission_time': 'submission time', 'problem_type': 'problem type'}, inplace=True)
    # to_show = full_df.copy(deep=True)
    # to_show = to_show[to_show['user'] != 'test']
    # to_show = to_show[['submission time', 'problem type', 'user', 'score', 'full results']]
    # to_show['user'] = to_show['user'].apply(lambda x: make_user_clickable(x)).astype(str)
    
    # Previously hosted on HF hub, local for now (Can also pull directly from github backend)
    column_order = ["model", "property", "spearman", "spearman_abs"]  # "assay", 
    df = pd.read_csv("data/metrics_all.csv").drop_duplicates(subset=["model", "assay"])
    df["property"] = df["assay"].map(ASSAY_RENAME)
    df = df.query("assay.isin(@ASSAY_RENAME.keys())")
    if assay is not None:
        df = df[df['assay'] == assay]
    df = df[column_order]
    return df.sort_values(by="spearman_abs", ascending=False)

def get_leaderboard_object(assay: str | None = None):
    df = get_leaderboard_table(assay=assay)
    filter_columns = ["model"]
    if assay is None:
        filter_columns.append("property")
    # TODO how to sort filter columns alphabetically?
    Leaderboard(
        value=df,
        datatype=["str", "str", "str", "number"],
        select_columns=["model", "property", "spearman"],
        search_columns=["model"],
        filter_columns=filter_columns,
        every=60,
        render=True
    )

def show_output_box(message):
    return gr.update(value=message, visible=True)

# 
# def gradio_interface() -> gr.Blocks:
with gr.Blocks() as demo:
    gr.Markdown("""
        ## Welcome to the Ginkgo Antibody Developability Benchmark Leaderboard!
        
        Participants can submit their model to the leaderboard by 
        """)
    with gr.Tabs(elem_classes="tab-buttons"):
        with gr.TabItem("🚀 Leaderboard", elem_id="abdev-benchmark-tab-table"):                
            gr.Markdown("# Antibody Developability Benchmark Leaderboard")

            get_leaderboard_object()

            # gr.Markdown("Extra info here")

        # Procedurally make these 5 tabs
        for assay in ASSAY_LIST:
            with gr.TabItem(f"{ASSAY_EMOJIS[assay]} {ASSAY_RENAME[assay]}", elem_id=f"abdev-benchmark-tab-table"):
                gr.Markdown(f"# {ASSAY_RENAME[assay]} (measured by {assay})")
                get_leaderboard_object(assay=assay)
        
        with gr.TabItem("❔About", elem_id="abdev-benchmark-tab-table"):
            gr.Markdown(
                """
                ## About this challenge
                
                We're inviting the ML/bio community to predict developability properties for 244 antibodies from the [GDPa1 dataset](https://huggingface.co/datasets/ginkgo-datapoints/GDPa1).
                
                **What is antibody developability?**
                
                Antibodies have to be manufacturable, stable in high concentrations, and have low off-target effects. 
                Properties such as these can often hinder the progression of an antibody to the clinic, and are collectively referred to as 'developability'.
                Here we show 5 of these properties and invite the community to submit and develop better predictors, which will be tested out on a heldout private set to assess model generalization.
                
                **How to submit?**
                
                TODO
                
                **How to evaluate?**
                
                TODO
            """
            )

            # dropdown = gr.Dropdown(choices=filenames, label="Choose a file")
            # plot_output = gr.Plot()

        # with gr.TabItem("🔍 Visualize", elem_id="boundary-benchmark-tab-table"):
        #     ds = load_dataset(results_repo, split='train', download_mode="force_redownload")
        #     full_df = pd.DataFrame(ds)
        #     filenames = full_df['result_filename'].to_list()
        #     with gr.Row():
        #         with gr.Column():
        #             dropdown = gr.Dropdown(choices=filenames, label="Choose a leaderboard entry", value=filenames[0])
        #             rld_btn = gr.Button(value="Reload")

        #         with gr.Column():
        #             plot = gr.Plot()

        #     def get_boundary_vis(selected_file):
        #         local_path = read_result_from_hub(selected_file)
        #         with Path(local_path).open("r") as f:
        #             raw = f.read()
        #             data_dict = json.loads(raw)
        #             boundary_json = data_dict['boundary_json']

        #         if data_dict['problem_type'] == 'mhd_stable':
        #             raise gr.Error("Sorry this isn't implemented for mhd_stable submissions yet!")
        #         else:
        #             boundary = load_boundary(boundary_json)

        #         vis = make_visual(boundary)
        #         return vis

        #     demo.load(get_boundary_vis, dropdown, plot)
        #     rld_btn.click(get_boundary_vis, dropdown, plot)

        # with gr.TabItem("✉️ Submit", elem_id="boundary-benchmark-tab-table"):
        #     gr.Markdown(
        #         """
        #     # Plasma Boundary Evaluation Submission
        #     Upload your plasma boundary JSON and select the problem type to get your score.
        #     """
        #     )
        #     filename = gr.State(value=None) 
        #     eval_state = gr.State(value=None) 
        #     user_state = gr.State(value=None)

        #     # gr.LoginButton()

        #     with gr.Row():
        #         with gr.Column():
        #             problem_type = gr.Dropdown(PROBLEM_TYPES, label="Problem Type")
        #             username_input = gr.Textbox(
        #                 label="Username", 
        #                 placeholder="Enter your Hugging Face username",
        #                 info="This will be displayed on the leaderboard."
        #             )
        #         with gr.Column():
        #             boundary_file = gr.File(label="Boundary JSON File (.json)")

        #     username_input.change(
        #         fn=lambda x: x if x.strip() else None,
        #         inputs=username_input,
        #         outputs=user_state
        #     )                       

        #     submit_btn = gr.Button("Evaluate")
        #     message = gr.Textbox(label="Status", lines=1, visible=False)
        #     # help message
        #     gr.Markdown("If you have issues with submission or using the leaderboard, please start a discussion in the Community tab of this Space.")
            
        #     submit_btn.click(
        #         submit_boundary,
        #         inputs=[problem_type, boundary_file, user_state],
        #         outputs=[message, filename],
        #     ).then(
        #         fn=show_output_box,
        #         inputs=[message],
        #         outputs=[message],
        #     ).then(
        #         fn=evaluate_boundary,
        #         inputs=[filename],
        #         outputs=[eval_state]
        #     )


if __name__ == "__main__":
    demo.launch()