abdev-leaderboard

Running

App Files Files Community

loodvanniekerkginkgo commited on 16 days ago

Commit

61fa714

1 Parent(s): 22f82e7

Text changes, only one leaderboard

Browse files

Files changed (3) hide show

about.py +8 -8
app.py +35 -24
constants.py +14 -1

about.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from constants import ABOUT_TAB_NAME, ASSAY_LIST, TERMS_URL
 ABOUT_INTRO = f"""
 ## About this challenge
@@ -17,7 +17,7 @@ For each of the 5 properties in the competition, there is a prize for the model
 There is also an 'open-source' prize for the best model trained on the GDPa1 dataset (reporting cross-validation results) and assessed on the private test set where authors provide all training code and data.
 For each of these 6 prizes, participants have the choice between **$10k in data generation credits** with [Ginkgo Datapoints](https://datapoints.ginkgo.bio/) or a **cash prize** with a value of $2000.
-See the FAQs below or the [competition terms]({TERMS_URL}) for more details.
 """
 ABOUT_TEXT = f"""
@@ -27,10 +27,10 @@ ABOUT_TEXT = f"""
 1. **Create a Hugging Face account** [here](https://huggingface.co/join) if you don't have one yet (this is used to track unique submissions and to access the GDPa1 dataset).
 2. **Register your team** on the [Competition Registration](https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition) page.
 3. **Build a model** or validate it on the [GDPa1](https://huggingface.co/datasets/ginkgo-datapoints/GDPa1) dataset.
-4. **Choose a validation track**. You must first upload predictions on the validation set before submitting predictions on the private test set:
-    - **Track 1**: If you already have a developability model, you can submit your predictions for the GDPa1 public dataset.
-    - **Track 2**: If you don't have a model, train one using cross-validation on the GDPa1 dataset and submit your predictions under the "Cross-validation" option.
-5. **Submit your predictions** as a CSV on the "✉️ Submit" tab on this page (you are currently on the "{ABOUT_TAB_NAME}" tab).
 #### How to contribute?
@@ -126,11 +126,11 @@ You do **not** need to predict all 5 properties — each property has its own le
 1. **Submit your predictions** as a CSV with `antibody_name` + one column per property you are predicting (e.g. `"antibody_name,Titer,PR_CHO"` if your model predicts Titer and Polyreactivity).
 2. **Final test submission**: Download test sequences from the example files below and upload predictions.
-The validation set results should appear on the leaderboard after a few seconds. The **private test set results will not appear on the leaderboards**, and will be used to determine the winners at the close of the competition.
 We may release private test set results at intermediate points during the competition.
 ## Cross-validation
 For the cross-validation metrics (if training only on the GDPa1 dataset), use the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column to split the dataset into folds and make predictions for each of the folds.
 Submit a CSV file in the same format but also containing the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column.
-"""

+from constants import ABOUT_TAB_NAME, ASSAY_LIST, SUBMIT_TAB_NAME, TERMS_URL, FAQ_TAB_NAME
 ABOUT_INTRO = f"""
 ## About this challenge
 There is also an 'open-source' prize for the best model trained on the GDPa1 dataset (reporting cross-validation results) and assessed on the private test set where authors provide all training code and data.
 For each of these 6 prizes, participants have the choice between **$10k in data generation credits** with [Ginkgo Datapoints](https://datapoints.ginkgo.bio/) or a **cash prize** with a value of $2000.
+See the "{FAQ_TAB_NAME}" tab above (you are currently on the "{ABOUT_TAB_NAME}" tab) or the [competition terms]({TERMS_URL}) for more details.
 """
 ABOUT_TEXT = f"""
 1. **Create a Hugging Face account** [here](https://huggingface.co/join) if you don't have one yet (this is used to track unique submissions and to access the GDPa1 dataset).
 2. **Register your team** on the [Competition Registration](https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition) page.
 3. **Build a model** or validate it on the [GDPa1](https://huggingface.co/datasets/ginkgo-datapoints/GDPa1) dataset.
+4. **Complete the "Qualifying Exam"**. Before you can submit to the final test set, you must first get a score on the public leaderboard. Choose one of the two tracks:
+    - Track 1 (Benchmark an existing model): Submit predictions for the `GDPa1` dataset.
+    - Track 2 (Train from scratch): Train a model using cross-validation on the `GDPa1` dataset and submit cross-validation predictions by selecting `GDPa1_cross_validation`.
+5. **Submit to the "Final Exam"**. Once you have submitted predictions on the validation set, download the private test set sequences from the {SUBMIT_TAB_NAME} tab and submit your final predictions. Your performance on this private set will determine the winners.
 #### How to contribute?
 1. **Submit your predictions** as a CSV with `antibody_name` + one column per property you are predicting (e.g. `"antibody_name,Titer,PR_CHO"` if your model predicts Titer and Polyreactivity).
 2. **Final test submission**: Download test sequences from the example files below and upload predictions.
+The validation set results should appear on the leaderboard within a minute. The **private test set results will not appear on the leaderboards**, and will be used to determine the winners at the close of the competition.
 We may release private test set results at intermediate points during the competition.
 ## Cross-validation
 For the cross-validation metrics (if training only on the GDPa1 dataset), use the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column to split the dataset into folds and make predictions for each of the folds.
 Submit a CSV file in the same format but also containing the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column.
+"""

app.py CHANGED Viewed

@@ -3,19 +3,20 @@ import pandas as pd
 import gradio as gr
 from gradio_leaderboard import Leaderboard
-from utils import fetch_hf_results, show_output_box
 from constants import (
-    ASSAY_LIST,
-    ASSAY_RENAME,
-    ASSAY_EMOJIS,
-    ASSAY_DESCRIPTION,
     EXAMPLE_FILE_DICT,
     LEADERBOARD_DISPLAY_COLUMNS,
     ABOUT_TAB_NAME,
     TERMS_URL,
 )
-from about import ABOUT_INTRO, ABOUT_TEXT, FAQS, SUBMIT_INTRUCTIONS
 from submit import make_submission
 def format_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None):
     df = df_results.query("assay.isin(@ASSAY_RENAME.keys())").copy()
@@ -28,6 +29,9 @@ def format_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None)
     # Convert spearman column to string to avoid dtype incompatibility when assigning text
     df["spearman"] = df["spearman"].astype(str)
     df.loc[(df["dataset"] == "Heldout Test Set") & (df["spearman"] == "nan"), "spearman"] = "N/A, evaluated at competition close"
     return df
@@ -41,9 +45,9 @@ def get_leaderboard_object(assay: str | None = None):
     lb = Leaderboard(
         value=format_leaderboard_table(df_results=current_dataframe, assay=assay),
         datatype=["str", "str", "str", "number"],
-        select_columns=["model", "property", "spearman", "dataset"],
-        search_columns=["model"],
-        filter_columns=filter_columns,
         every=15,
         render=True,
     )
@@ -100,23 +104,24 @@ with gr.Blocks() as demo:
             gr.Markdown(ABOUT_TEXT)
         # Procedurally make these 5 tabs
-        for i, assay in enumerate(ASSAY_LIST):
-            with gr.TabItem(
-                f"{ASSAY_EMOJIS[assay]} {ASSAY_RENAME[assay]}",
-                elem_id="abdev-benchmark-tab-table",
-            ) as tab_item:
-                gr.Markdown(f"# {ASSAY_DESCRIPTION[assay]}")
-                lb = get_leaderboard_object(assay=assay)
-                def refresh_leaderboard(assay=assay):
-                    return format_leaderboard_table(df_results=current_dataframe, assay=assay)
-                # Refresh when data version changes
-                data_version.change(fn=refresh_leaderboard, outputs=lb)
-        with gr.TabItem("🚀 Overall", elem_id="abdev-benchmark-tab-table") as overall_tab:
             gr.Markdown(
-                "# Antibody Developability Benchmark Leaderboard over all properties"
             )
             lb = get_leaderboard_object()
@@ -125,8 +130,13 @@ with gr.Blocks() as demo:
             # Refresh when data version changes
             data_version.change(fn=refresh_overall_leaderboard, outputs=lb)
-        with gr.TabItem("✉️ Submit", elem_id="boundary-benchmark-tab-table"):
             gr.Markdown(SUBMIT_INTRUCTIONS)
             submission_type_state = gr.State(value="GDPa1")
             download_file_state = gr.State(value=EXAMPLE_FILE_DICT["GDPa1"])
@@ -164,6 +174,7 @@ with gr.Blocks() as demo:
                         choices=["GDPa1", "GDPa1_cross_validation", "Heldout Test Set"],
                         value="GDPa1",
                         label="Submission Type",
                     )
                     download_button = gr.DownloadButton(
                         label="📥 Download example submission CSV for GDPa1",
@@ -225,7 +236,7 @@ with gr.Blocks() as demo:
                 inputs=[message],
                 outputs=[message],
             )
-        with gr.Tab("❔ FAQs"):
             gr.Markdown("# Frequently Asked Questions")
             for i, (question, answer) in enumerate(FAQS.items()):
                 # Would love to make questions bold but accordion doesn't support it

 import gradio as gr
 from gradio_leaderboard import Leaderboard
+from about import ABOUT_INTRO, ABOUT_TEXT, FAQS, SUBMIT_INTRUCTIONS
 from constants import (
+    ASSAY_RENAME,  # keep this: used in df query
     EXAMPLE_FILE_DICT,
     LEADERBOARD_DISPLAY_COLUMNS,
     ABOUT_TAB_NAME,
+    FAQ_TAB_NAME,
     TERMS_URL,
+    LEADERBOARD_COLUMNS_RENAME,
+    LEADERBOARD_COLUMNS_RENAME_LIST,
+    SUBMIT_TAB_NAME,
 )
 from submit import make_submission
+from utils import fetch_hf_results, show_output_box
 def format_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None):
     df = df_results.query("assay.isin(@ASSAY_RENAME.keys())").copy()
     # Convert spearman column to string to avoid dtype incompatibility when assigning text
     df["spearman"] = df["spearman"].astype(str)
     df.loc[(df["dataset"] == "Heldout Test Set") & (df["spearman"] == "nan"), "spearman"] = "N/A, evaluated at competition close"
+    # Finally, rename columns for readability
+    df = df.rename(columns=LEADERBOARD_COLUMNS_RENAME)
     return df
     lb = Leaderboard(
         value=format_leaderboard_table(df_results=current_dataframe, assay=assay),
         datatype=["str", "str", "str", "number"],
+        select_columns=LEADERBOARD_COLUMNS_RENAME_LIST(["model", "property", "spearman", "dataset"]),
+        search_columns=["Model Name"],
+        filter_columns=LEADERBOARD_COLUMNS_RENAME_LIST(filter_columns),
         every=15,
         render=True,
     )
             gr.Markdown(ABOUT_TEXT)
         # Procedurally make these 5 tabs
+        # for i, assay in enumerate(ASSAY_LIST):
+        #     with gr.TabItem(
+        #         f"{ASSAY_EMOJIS[assay]} {ASSAY_RENAME[assay]}",
+        #         elem_id="abdev-benchmark-tab-table",
+        #     ) as tab_item:
+        #         gr.Markdown(f"# {ASSAY_DESCRIPTION[assay]}")
+        #         lb = get_leaderboard_object(assay=assay)
+        #         def refresh_leaderboard(assay=assay):
+        #             return format_leaderboard_table(df_results=current_dataframe, assay=assay)
+        #         # Refresh when data version changes
+        #         data_version.change(fn=refresh_leaderboard, outputs=lb)
+        # Note(Lood): Trying out just one leaderboard. We could also have a dropdown here that shows different leaderboards for each property, but that's just the same as the filters
+        with gr.TabItem("🏆 Leaderboard", elem_id="abdev-benchmark-tab-table") as leaderboard_tab:
             gr.Markdown(
+                "# Overall Leaderboard (filter below by property)"  # TODO add details about the 6 prizes here
             )
             lb = get_leaderboard_object()
             # Refresh when data version changes
             data_version.change(fn=refresh_overall_leaderboard, outputs=lb)
+            # At the bottom of the leaderboard, we can keep as NaN and explain missing test set results
+            # gr.Markdown(
+            #     "_ℹ️ Results for the private test set will not be shown here and will be used for final judging at the close of the competition._"
+            # )
+        with gr.TabItem(SUBMIT_TAB_NAME, elem_id="boundary-benchmark-tab-table"):
             gr.Markdown(SUBMIT_INTRUCTIONS)
             submission_type_state = gr.State(value="GDPa1")
             download_file_state = gr.State(value=EXAMPLE_FILE_DICT["GDPa1"])
                         choices=["GDPa1", "GDPa1_cross_validation", "Heldout Test Set"],
                         value="GDPa1",
                         label="Submission Type",
+                        info=f"Choose the dataset corresponding to the track you're participating in. See the '{ABOUT_TAB_NAME}' tab for details.",
                     )
                     download_button = gr.DownloadButton(
                         label="📥 Download example submission CSV for GDPa1",
                 inputs=[message],
                 outputs=[message],
             )
+        with gr.Tab(FAQ_TAB_NAME):
             gr.Markdown("# Frequently Asked Questions")
             for i, (question, answer) in enumerate(FAQS.items()):
                 # Would love to make questions bold but accordion doesn't support it

constants.py CHANGED Viewed

@@ -28,7 +28,10 @@ ASSAY_EMOJIS = {
     "Tm2": "🌡️",
     "Titer": "🧪",
 }
-ABOUT_TAB_NAME = "ℹ️ About"
 REGISTRATION_CODE = "GINKGO-ABDEV-2025"
 TERMS_URL = "https://euphsfcyogalqiqsawbo.supabase.co/storage/v1/object/public/gdpweb/pdfs/2025%20Ginkgo%20Antibody%20Developability%20Prediction%20Competition%202025-08-28-v2.pdf"
@@ -68,3 +71,13 @@ RESULTS_REPO = f"{ORGANIZATION}/abdev-bench-results"
 # Leaderboard dataframes
 LEADERBOARD_RESULTS_COLUMNS = ["model", "assay", "spearman", "dataset", "user", "submission_time"]  # The columns expected from the results dataset
 LEADERBOARD_DISPLAY_COLUMNS = ["model", "property", "spearman", "dataset", "user", "submission_time"]  # After changing assay to property (pretty formatting)

     "Tm2": "🌡️",
     "Titer": "🧪",
 }
+# Tabs with emojis
+ABOUT_TAB_NAME = "📖 About / Rules"
+FAQ_TAB_NAME = "❓ FAQs"
+SUBMIT_TAB_NAME = "✉️ Submit"
 REGISTRATION_CODE = "GINKGO-ABDEV-2025"
 TERMS_URL = "https://euphsfcyogalqiqsawbo.supabase.co/storage/v1/object/public/gdpweb/pdfs/2025%20Ginkgo%20Antibody%20Developability%20Prediction%20Competition%202025-08-28-v2.pdf"
 # Leaderboard dataframes
 LEADERBOARD_RESULTS_COLUMNS = ["model", "assay", "spearman", "dataset", "user", "submission_time"]  # The columns expected from the results dataset
 LEADERBOARD_DISPLAY_COLUMNS = ["model", "property", "spearman", "dataset", "user", "submission_time"]  # After changing assay to property (pretty formatting)
+LEADERBOARD_COLUMNS_RENAME = {
+    "spearman": "Spearman Correlation",
+    "dataset": "Dataset",
+    "user": "User",
+    "submission_time": "Submission Time",
+    "model": "Model Name",
+    "property": "Property",
+}
+def LEADERBOARD_COLUMNS_RENAME_LIST(columns: list[str]) -> list[str]:
+    return list(map(lambda x: LEADERBOARD_COLUMNS_RENAME.get(x,x), columns))