""" Constants for the Antibody Developability Benchmark """ import os from huggingface_hub import HfApi import pandas as pd ASSAY_LIST = ["AC-SINS_pH7.4", "PR_CHO", "HIC", "Tm2", "Titer"] ASSAY_RENAME = { "AC-SINS_pH7.4": "Self-association", "PR_CHO": "Polyreactivity", "HIC": "Hydrophobicity", "Tm2": "Thermostability", "Titer": "Titer", } ASSAY_DESCRIPTION = { "AC-SINS_pH7.4": "Self association by AC-SINS at pH 7.4", "PR_CHO": "Polyreactivity by bead-based method against CHO SMP and ovalbumin", "HIC": "Hydrophobicity by HIC", "Tm2": "Thermostability by nanoDSF", "Titer": "Titer by Valita", } ASSAY_EMOJIS = { "AC-SINS_pH7.4": "🧲", "PR_CHO": "🎯", "HIC": "💧", "Tm2": "🌡️", "Titer": "🧪", } # Input CSV file requirements REQUIRED_COLUMNS: list[str] = [ "antibody_name", "vh_protein_sequence", "vl_protein_sequence", ] # Cross validation CV_COLUMN = "hierarchical_cluster_IgG_isotype_stratified_fold" # Example files EXAMPLE_FILE_DICT = { "GDPa1": "data/example-predictions.csv", "GDPa1_cross_validation": "data/example-predictions-cv.csv", } ANTIBODY_NAMES_DICT = { "GDPa1": pd.read_csv(EXAMPLE_FILE_DICT["GDPa1"])["antibody_name"].tolist(), "GDPa1_cross_validation": pd.read_csv(EXAMPLE_FILE_DICT["GDPa1_cross_validation"])[ "antibody_name" ].tolist(), } # Huggingface API TOKEN = os.environ.get("HF_TOKEN") CACHE_PATH = os.getenv("HF_HOME", ".") API = HfApi(token=TOKEN) # Huggingface repos ORGANIZATION = "ginkgo-datapoints" SUBMISSIONS_REPO = f"{ORGANIZATION}/abdev-bench-submissions" RESULTS_REPO = f"{ORGANIZATION}/abdev-bench-results" ANONYMOUS_SUBMISSION_USERNAME = "anonymoussubmissions"