""" Constants for the Antibody Developability Benchmark """ import os from huggingface_hub import HfApi import pandas as pd ASSAY_LIST = ["AC-SINS_pH7.4", "PR_CHO", "HIC", "Tm2", "Titer"] ASSAY_RENAME = { "AC-SINS_pH7.4": "Self-association", "PR_CHO": "Polyreactivity", "HIC": "Hydrophobicity", "Tm2": "Thermostability", "Titer": "Titer", } ASSAY_DESCRIPTION = { "AC-SINS_pH7.4": "Self association by AC-SINS at pH 7.4", "PR_CHO": "Polyreactivity by bead-based method against CHO SMP", "HIC": "Hydrophobicity by HIC", "Tm2": "Thermostability by nanoDSF", "Titer": "Titer by Valita", } ASSAY_EMOJIS = { "AC-SINS_pH7.4": "๐Ÿงฒ", "PR_CHO": "๐ŸŽฏ", "HIC": "๐Ÿ’ง", "Tm2": "๐ŸŒก๏ธ", "Titer": "๐Ÿงช", } # Tabs with emojis ABOUT_TAB_NAME = "๐Ÿ“– About / Rules" FAQ_TAB_NAME = "โ“ FAQs" SUBMIT_TAB_NAME = "โœ‰๏ธ Submit" REGISTRATION_CODE = "GINKGO-ABDEV-2025" TERMS_URL = "https://euphsfcyogalqiqsawbo.supabase.co/storage/v1/object/public/gdpweb/pdfs/2025%20Ginkgo%20Antibody%20Developability%20Prediction%20Competition%202025-08-28-v2.pdf" # Input CSV file requirements REQUIRED_COLUMNS: list[str] = [ "antibody_name", "vh_protein_sequence", "vl_protein_sequence", ] # Cross validation CV_COLUMN = "hierarchical_cluster_IgG_isotype_stratified_fold" # Example files EXAMPLE_FILE_DICT = { "GDPa1": "data/example-predictions.csv", "GDPa1_cross_validation": "data/example-predictions-cv.csv", "Heldout Test Set": "data/example-predictions-heldout.csv", } ANTIBODY_NAMES_DICT = { "GDPa1": pd.read_csv(EXAMPLE_FILE_DICT["GDPa1"])["antibody_name"].tolist(), "GDPa1_cross_validation": pd.read_csv(EXAMPLE_FILE_DICT["GDPa1_cross_validation"])[ "antibody_name" ].tolist(), "Heldout Test Set": pd.read_csv(EXAMPLE_FILE_DICT["Heldout Test Set"])["antibody_name"].tolist(), } # Huggingface API TOKEN = os.environ.get("HF_TOKEN") CACHE_PATH = os.getenv("HF_HOME", ".") API = HfApi(token=TOKEN) # Huggingface repos ORGANIZATION = "ginkgo-datapoints" SUBMISSIONS_REPO = f"{ORGANIZATION}/abdev-bench-submissions" RESULTS_REPO = f"{ORGANIZATION}/abdev-bench-results" # Leaderboard dataframes LEADERBOARD_RESULTS_COLUMNS = ["model", "assay", "spearman", "dataset", "user", "submission_time"] # The columns expected from the results dataset LEADERBOARD_DISPLAY_COLUMNS = ["model", "property", "spearman", "dataset", "user", "submission_time"] # After changing assay to property (pretty formatting) LEADERBOARD_COLUMNS_RENAME = { "spearman": "Spearman Correlation", "dataset": "Dataset", "user": "User", "submission_time": "Submission Time", "model": "Model Name", "property": "Property", } def LEADERBOARD_COLUMNS_RENAME_LIST(columns: list[str]) -> list[str]: return list(map(lambda x: LEADERBOARD_COLUMNS_RENAME.get(x,x), columns))