Commit
·
61fa714
1
Parent(s):
22f82e7
Text changes, only one leaderboard
Browse files- about.py +8 -8
- app.py +35 -24
- constants.py +14 -1
about.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from constants import ABOUT_TAB_NAME, ASSAY_LIST, TERMS_URL
|
2 |
|
3 |
ABOUT_INTRO = f"""
|
4 |
## About this challenge
|
@@ -17,7 +17,7 @@ For each of the 5 properties in the competition, there is a prize for the model
|
|
17 |
There is also an 'open-source' prize for the best model trained on the GDPa1 dataset (reporting cross-validation results) and assessed on the private test set where authors provide all training code and data.
|
18 |
For each of these 6 prizes, participants have the choice between **$10k in data generation credits** with [Ginkgo Datapoints](https://datapoints.ginkgo.bio/) or a **cash prize** with a value of $2000.
|
19 |
|
20 |
-
See the
|
21 |
"""
|
22 |
|
23 |
ABOUT_TEXT = f"""
|
@@ -27,10 +27,10 @@ ABOUT_TEXT = f"""
|
|
27 |
1. **Create a Hugging Face account** [here](https://huggingface.co/join) if you don't have one yet (this is used to track unique submissions and to access the GDPa1 dataset).
|
28 |
2. **Register your team** on the [Competition Registration](https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition) page.
|
29 |
3. **Build a model** or validate it on the [GDPa1](https://huggingface.co/datasets/ginkgo-datapoints/GDPa1) dataset.
|
30 |
-
4. **
|
31 |
-
-
|
32 |
-
-
|
33 |
-
5. **Submit
|
34 |
|
35 |
#### How to contribute?
|
36 |
|
@@ -126,11 +126,11 @@ You do **not** need to predict all 5 properties — each property has its own le
|
|
126 |
1. **Submit your predictions** as a CSV with `antibody_name` + one column per property you are predicting (e.g. `"antibody_name,Titer,PR_CHO"` if your model predicts Titer and Polyreactivity).
|
127 |
2. **Final test submission**: Download test sequences from the example files below and upload predictions.
|
128 |
|
129 |
-
The validation set results should appear on the leaderboard
|
130 |
We may release private test set results at intermediate points during the competition.
|
131 |
|
132 |
## Cross-validation
|
133 |
|
134 |
For the cross-validation metrics (if training only on the GDPa1 dataset), use the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column to split the dataset into folds and make predictions for each of the folds.
|
135 |
Submit a CSV file in the same format but also containing the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column.
|
136 |
-
"""
|
|
|
1 |
+
from constants import ABOUT_TAB_NAME, ASSAY_LIST, SUBMIT_TAB_NAME, TERMS_URL, FAQ_TAB_NAME
|
2 |
|
3 |
ABOUT_INTRO = f"""
|
4 |
## About this challenge
|
|
|
17 |
There is also an 'open-source' prize for the best model trained on the GDPa1 dataset (reporting cross-validation results) and assessed on the private test set where authors provide all training code and data.
|
18 |
For each of these 6 prizes, participants have the choice between **$10k in data generation credits** with [Ginkgo Datapoints](https://datapoints.ginkgo.bio/) or a **cash prize** with a value of $2000.
|
19 |
|
20 |
+
See the "{FAQ_TAB_NAME}" tab above (you are currently on the "{ABOUT_TAB_NAME}" tab) or the [competition terms]({TERMS_URL}) for more details.
|
21 |
"""
|
22 |
|
23 |
ABOUT_TEXT = f"""
|
|
|
27 |
1. **Create a Hugging Face account** [here](https://huggingface.co/join) if you don't have one yet (this is used to track unique submissions and to access the GDPa1 dataset).
|
28 |
2. **Register your team** on the [Competition Registration](https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition) page.
|
29 |
3. **Build a model** or validate it on the [GDPa1](https://huggingface.co/datasets/ginkgo-datapoints/GDPa1) dataset.
|
30 |
+
4. **Complete the "Qualifying Exam"**. Before you can submit to the final test set, you must first get a score on the public leaderboard. Choose one of the two tracks:
|
31 |
+
- Track 1 (Benchmark an existing model): Submit predictions for the `GDPa1` dataset.
|
32 |
+
- Track 2 (Train from scratch): Train a model using cross-validation on the `GDPa1` dataset and submit cross-validation predictions by selecting `GDPa1_cross_validation`.
|
33 |
+
5. **Submit to the "Final Exam"**. Once you have submitted predictions on the validation set, download the private test set sequences from the {SUBMIT_TAB_NAME} tab and submit your final predictions. Your performance on this private set will determine the winners.
|
34 |
|
35 |
#### How to contribute?
|
36 |
|
|
|
126 |
1. **Submit your predictions** as a CSV with `antibody_name` + one column per property you are predicting (e.g. `"antibody_name,Titer,PR_CHO"` if your model predicts Titer and Polyreactivity).
|
127 |
2. **Final test submission**: Download test sequences from the example files below and upload predictions.
|
128 |
|
129 |
+
The validation set results should appear on the leaderboard within a minute. The **private test set results will not appear on the leaderboards**, and will be used to determine the winners at the close of the competition.
|
130 |
We may release private test set results at intermediate points during the competition.
|
131 |
|
132 |
## Cross-validation
|
133 |
|
134 |
For the cross-validation metrics (if training only on the GDPa1 dataset), use the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column to split the dataset into folds and make predictions for each of the folds.
|
135 |
Submit a CSV file in the same format but also containing the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column.
|
136 |
+
"""
|
app.py
CHANGED
@@ -3,19 +3,20 @@ import pandas as pd
|
|
3 |
import gradio as gr
|
4 |
from gradio_leaderboard import Leaderboard
|
5 |
|
6 |
-
from
|
7 |
from constants import (
|
8 |
-
|
9 |
-
ASSAY_RENAME,
|
10 |
-
ASSAY_EMOJIS,
|
11 |
-
ASSAY_DESCRIPTION,
|
12 |
EXAMPLE_FILE_DICT,
|
13 |
LEADERBOARD_DISPLAY_COLUMNS,
|
14 |
ABOUT_TAB_NAME,
|
|
|
15 |
TERMS_URL,
|
|
|
|
|
|
|
16 |
)
|
17 |
-
from about import ABOUT_INTRO, ABOUT_TEXT, FAQS, SUBMIT_INTRUCTIONS
|
18 |
from submit import make_submission
|
|
|
19 |
|
20 |
def format_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None):
|
21 |
df = df_results.query("assay.isin(@ASSAY_RENAME.keys())").copy()
|
@@ -28,6 +29,9 @@ def format_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None)
|
|
28 |
# Convert spearman column to string to avoid dtype incompatibility when assigning text
|
29 |
df["spearman"] = df["spearman"].astype(str)
|
30 |
df.loc[(df["dataset"] == "Heldout Test Set") & (df["spearman"] == "nan"), "spearman"] = "N/A, evaluated at competition close"
|
|
|
|
|
|
|
31 |
return df
|
32 |
|
33 |
|
@@ -41,9 +45,9 @@ def get_leaderboard_object(assay: str | None = None):
|
|
41 |
lb = Leaderboard(
|
42 |
value=format_leaderboard_table(df_results=current_dataframe, assay=assay),
|
43 |
datatype=["str", "str", "str", "number"],
|
44 |
-
select_columns=["model", "property", "spearman", "dataset"],
|
45 |
-
search_columns=["
|
46 |
-
filter_columns=filter_columns,
|
47 |
every=15,
|
48 |
render=True,
|
49 |
)
|
@@ -100,23 +104,24 @@ with gr.Blocks() as demo:
|
|
100 |
gr.Markdown(ABOUT_TEXT)
|
101 |
|
102 |
# Procedurally make these 5 tabs
|
103 |
-
for i, assay in enumerate(ASSAY_LIST):
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
|
111 |
-
|
112 |
-
|
113 |
|
114 |
-
|
115 |
-
|
116 |
|
117 |
-
|
|
|
118 |
gr.Markdown(
|
119 |
-
"#
|
120 |
)
|
121 |
lb = get_leaderboard_object()
|
122 |
|
@@ -125,8 +130,13 @@ with gr.Blocks() as demo:
|
|
125 |
|
126 |
# Refresh when data version changes
|
127 |
data_version.change(fn=refresh_overall_leaderboard, outputs=lb)
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
-
with gr.TabItem(
|
130 |
gr.Markdown(SUBMIT_INTRUCTIONS)
|
131 |
submission_type_state = gr.State(value="GDPa1")
|
132 |
download_file_state = gr.State(value=EXAMPLE_FILE_DICT["GDPa1"])
|
@@ -164,6 +174,7 @@ with gr.Blocks() as demo:
|
|
164 |
choices=["GDPa1", "GDPa1_cross_validation", "Heldout Test Set"],
|
165 |
value="GDPa1",
|
166 |
label="Submission Type",
|
|
|
167 |
)
|
168 |
download_button = gr.DownloadButton(
|
169 |
label="📥 Download example submission CSV for GDPa1",
|
@@ -225,7 +236,7 @@ with gr.Blocks() as demo:
|
|
225 |
inputs=[message],
|
226 |
outputs=[message],
|
227 |
)
|
228 |
-
with gr.Tab(
|
229 |
gr.Markdown("# Frequently Asked Questions")
|
230 |
for i, (question, answer) in enumerate(FAQS.items()):
|
231 |
# Would love to make questions bold but accordion doesn't support it
|
|
|
3 |
import gradio as gr
|
4 |
from gradio_leaderboard import Leaderboard
|
5 |
|
6 |
+
from about import ABOUT_INTRO, ABOUT_TEXT, FAQS, SUBMIT_INTRUCTIONS
|
7 |
from constants import (
|
8 |
+
ASSAY_RENAME, # keep this: used in df query
|
|
|
|
|
|
|
9 |
EXAMPLE_FILE_DICT,
|
10 |
LEADERBOARD_DISPLAY_COLUMNS,
|
11 |
ABOUT_TAB_NAME,
|
12 |
+
FAQ_TAB_NAME,
|
13 |
TERMS_URL,
|
14 |
+
LEADERBOARD_COLUMNS_RENAME,
|
15 |
+
LEADERBOARD_COLUMNS_RENAME_LIST,
|
16 |
+
SUBMIT_TAB_NAME,
|
17 |
)
|
|
|
18 |
from submit import make_submission
|
19 |
+
from utils import fetch_hf_results, show_output_box
|
20 |
|
21 |
def format_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None):
|
22 |
df = df_results.query("assay.isin(@ASSAY_RENAME.keys())").copy()
|
|
|
29 |
# Convert spearman column to string to avoid dtype incompatibility when assigning text
|
30 |
df["spearman"] = df["spearman"].astype(str)
|
31 |
df.loc[(df["dataset"] == "Heldout Test Set") & (df["spearman"] == "nan"), "spearman"] = "N/A, evaluated at competition close"
|
32 |
+
|
33 |
+
# Finally, rename columns for readability
|
34 |
+
df = df.rename(columns=LEADERBOARD_COLUMNS_RENAME)
|
35 |
return df
|
36 |
|
37 |
|
|
|
45 |
lb = Leaderboard(
|
46 |
value=format_leaderboard_table(df_results=current_dataframe, assay=assay),
|
47 |
datatype=["str", "str", "str", "number"],
|
48 |
+
select_columns=LEADERBOARD_COLUMNS_RENAME_LIST(["model", "property", "spearman", "dataset"]),
|
49 |
+
search_columns=["Model Name"],
|
50 |
+
filter_columns=LEADERBOARD_COLUMNS_RENAME_LIST(filter_columns),
|
51 |
every=15,
|
52 |
render=True,
|
53 |
)
|
|
|
104 |
gr.Markdown(ABOUT_TEXT)
|
105 |
|
106 |
# Procedurally make these 5 tabs
|
107 |
+
# for i, assay in enumerate(ASSAY_LIST):
|
108 |
+
# with gr.TabItem(
|
109 |
+
# f"{ASSAY_EMOJIS[assay]} {ASSAY_RENAME[assay]}",
|
110 |
+
# elem_id="abdev-benchmark-tab-table",
|
111 |
+
# ) as tab_item:
|
112 |
+
# gr.Markdown(f"# {ASSAY_DESCRIPTION[assay]}")
|
113 |
+
# lb = get_leaderboard_object(assay=assay)
|
114 |
|
115 |
+
# def refresh_leaderboard(assay=assay):
|
116 |
+
# return format_leaderboard_table(df_results=current_dataframe, assay=assay)
|
117 |
|
118 |
+
# # Refresh when data version changes
|
119 |
+
# data_version.change(fn=refresh_leaderboard, outputs=lb)
|
120 |
|
121 |
+
# Note(Lood): Trying out just one leaderboard. We could also have a dropdown here that shows different leaderboards for each property, but that's just the same as the filters
|
122 |
+
with gr.TabItem("🏆 Leaderboard", elem_id="abdev-benchmark-tab-table") as leaderboard_tab:
|
123 |
gr.Markdown(
|
124 |
+
"# Overall Leaderboard (filter below by property)" # TODO add details about the 6 prizes here
|
125 |
)
|
126 |
lb = get_leaderboard_object()
|
127 |
|
|
|
130 |
|
131 |
# Refresh when data version changes
|
132 |
data_version.change(fn=refresh_overall_leaderboard, outputs=lb)
|
133 |
+
|
134 |
+
# At the bottom of the leaderboard, we can keep as NaN and explain missing test set results
|
135 |
+
# gr.Markdown(
|
136 |
+
# "_ℹ️ Results for the private test set will not be shown here and will be used for final judging at the close of the competition._"
|
137 |
+
# )
|
138 |
|
139 |
+
with gr.TabItem(SUBMIT_TAB_NAME, elem_id="boundary-benchmark-tab-table"):
|
140 |
gr.Markdown(SUBMIT_INTRUCTIONS)
|
141 |
submission_type_state = gr.State(value="GDPa1")
|
142 |
download_file_state = gr.State(value=EXAMPLE_FILE_DICT["GDPa1"])
|
|
|
174 |
choices=["GDPa1", "GDPa1_cross_validation", "Heldout Test Set"],
|
175 |
value="GDPa1",
|
176 |
label="Submission Type",
|
177 |
+
info=f"Choose the dataset corresponding to the track you're participating in. See the '{ABOUT_TAB_NAME}' tab for details.",
|
178 |
)
|
179 |
download_button = gr.DownloadButton(
|
180 |
label="📥 Download example submission CSV for GDPa1",
|
|
|
236 |
inputs=[message],
|
237 |
outputs=[message],
|
238 |
)
|
239 |
+
with gr.Tab(FAQ_TAB_NAME):
|
240 |
gr.Markdown("# Frequently Asked Questions")
|
241 |
for i, (question, answer) in enumerate(FAQS.items()):
|
242 |
# Would love to make questions bold but accordion doesn't support it
|
constants.py
CHANGED
@@ -28,7 +28,10 @@ ASSAY_EMOJIS = {
|
|
28 |
"Tm2": "🌡️",
|
29 |
"Titer": "🧪",
|
30 |
}
|
31 |
-
|
|
|
|
|
|
|
32 |
|
33 |
REGISTRATION_CODE = "GINKGO-ABDEV-2025"
|
34 |
TERMS_URL = "https://euphsfcyogalqiqsawbo.supabase.co/storage/v1/object/public/gdpweb/pdfs/2025%20Ginkgo%20Antibody%20Developability%20Prediction%20Competition%202025-08-28-v2.pdf"
|
@@ -68,3 +71,13 @@ RESULTS_REPO = f"{ORGANIZATION}/abdev-bench-results"
|
|
68 |
# Leaderboard dataframes
|
69 |
LEADERBOARD_RESULTS_COLUMNS = ["model", "assay", "spearman", "dataset", "user", "submission_time"] # The columns expected from the results dataset
|
70 |
LEADERBOARD_DISPLAY_COLUMNS = ["model", "property", "spearman", "dataset", "user", "submission_time"] # After changing assay to property (pretty formatting)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
"Tm2": "🌡️",
|
29 |
"Titer": "🧪",
|
30 |
}
|
31 |
+
# Tabs with emojis
|
32 |
+
ABOUT_TAB_NAME = "📖 About / Rules"
|
33 |
+
FAQ_TAB_NAME = "❓ FAQs"
|
34 |
+
SUBMIT_TAB_NAME = "✉️ Submit"
|
35 |
|
36 |
REGISTRATION_CODE = "GINKGO-ABDEV-2025"
|
37 |
TERMS_URL = "https://euphsfcyogalqiqsawbo.supabase.co/storage/v1/object/public/gdpweb/pdfs/2025%20Ginkgo%20Antibody%20Developability%20Prediction%20Competition%202025-08-28-v2.pdf"
|
|
|
71 |
# Leaderboard dataframes
|
72 |
LEADERBOARD_RESULTS_COLUMNS = ["model", "assay", "spearman", "dataset", "user", "submission_time"] # The columns expected from the results dataset
|
73 |
LEADERBOARD_DISPLAY_COLUMNS = ["model", "property", "spearman", "dataset", "user", "submission_time"] # After changing assay to property (pretty formatting)
|
74 |
+
LEADERBOARD_COLUMNS_RENAME = {
|
75 |
+
"spearman": "Spearman Correlation",
|
76 |
+
"dataset": "Dataset",
|
77 |
+
"user": "User",
|
78 |
+
"submission_time": "Submission Time",
|
79 |
+
"model": "Model Name",
|
80 |
+
"property": "Property",
|
81 |
+
}
|
82 |
+
def LEADERBOARD_COLUMNS_RENAME_LIST(columns: list[str]) -> list[str]:
|
83 |
+
return list(map(lambda x: LEADERBOARD_COLUMNS_RENAME.get(x,x), columns))
|