File size: 2,940 Bytes
7a1c35b 4d9df8e 7a1c35b 62b6599 7a1c35b 61fa714 7a1c35b 6a48cdf 15ae508 471531b 7a1c35b 0f3e1b5 b2a1e67 bff3b9b 7ac33bb b2a1e67 bff3b9b 2dafeb1 b2a1e67 7a1c35b 8f9985e 7a1c35b 8f9985e 5d5df93 10e69e7 2dafeb1 61fa714 2dafeb1 61fa714 2dafeb1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
"""
Constants for the Antibody Developability Benchmark
"""
import os
from huggingface_hub import HfApi
import pandas as pd
ASSAY_LIST = ["AC-SINS_pH7.4", "PR_CHO", "HIC", "Tm2", "Titer"]
ASSAY_RENAME = {
"AC-SINS_pH7.4": "Self-association",
"PR_CHO": "Polyreactivity",
"HIC": "Hydrophobicity",
"Tm2": "Thermostability",
"Titer": "Titer",
}
ASSAY_DESCRIPTION = {
"AC-SINS_pH7.4": "Self association by AC-SINS at pH 7.4",
"PR_CHO": "Polyreactivity by bead-based method against CHO SMP",
"HIC": "Hydrophobicity by HIC",
"Tm2": "Thermostability by nanoDSF",
"Titer": "Titer by Valita",
}
ASSAY_EMOJIS = {
"AC-SINS_pH7.4": "🧲",
"PR_CHO": "🎯",
"HIC": "💧",
"Tm2": "🌡️",
"Titer": "🧪",
}
# Tabs with emojis
ABOUT_TAB_NAME = "📖 About / Rules"
FAQ_TAB_NAME = "❓ FAQs"
SUBMIT_TAB_NAME = "✉️ Submit"
REGISTRATION_CODE = os.environ.get("REGISTRATION_CODE")
TERMS_URL = "https://euphsfcyogalqiqsawbo.supabase.co/storage/v1/object/public/gdpweb/pdfs/2025%20Ginkgo%20Antibody%20Developability%20Prediction%20Competition%202025-08-28-v2.pdf"
# Input CSV file requirements
REQUIRED_COLUMNS: list[str] = [
"antibody_name",
"vh_protein_sequence",
"vl_protein_sequence",
]
# Cross validation
CV_COLUMN = "hierarchical_cluster_IgG_isotype_stratified_fold"
# Example files
EXAMPLE_FILE_DICT = {
"GDPa1": "data/example-predictions.csv",
"GDPa1_cross_validation": "data/example-predictions-cv.csv",
"Heldout Test Set": "data/example-predictions-heldout.csv",
}
ANTIBODY_NAMES_DICT = {
"GDPa1": pd.read_csv(EXAMPLE_FILE_DICT["GDPa1"])["antibody_name"].tolist(),
"GDPa1_cross_validation": pd.read_csv(EXAMPLE_FILE_DICT["GDPa1_cross_validation"])[
"antibody_name"
].tolist(),
"Heldout Test Set": pd.read_csv(EXAMPLE_FILE_DICT["Heldout Test Set"])[
"antibody_name"
].tolist(),
}
# Huggingface API
TOKEN = os.environ.get("HF_TOKEN")
CACHE_PATH = os.getenv("HF_HOME", ".")
API = HfApi(token=TOKEN)
# Huggingface repos
ORGANIZATION = "ginkgo-datapoints"
SUBMISSIONS_REPO = f"{ORGANIZATION}/abdev-bench-submissions"
RESULTS_REPO = f"{ORGANIZATION}/abdev-bench-results"
# Leaderboard dataframes
LEADERBOARD_RESULTS_COLUMNS = [
"model",
"assay",
"spearman",
"dataset",
"user",
"submission_time",
] # The columns expected from the results dataset
LEADERBOARD_DISPLAY_COLUMNS = [
"model",
"property",
"spearman",
"dataset",
"user",
"submission_time",
] # After changing assay to property (pretty formatting)
LEADERBOARD_COLUMNS_RENAME = {
"spearman": "Spearman Correlation",
"dataset": "Dataset",
"user": "User",
"submission_time": "Submission Time",
"model": "Model Name",
"property": "Property",
}
def LEADERBOARD_COLUMNS_RENAME_LIST(columns: list[str]) -> list[str]:
return list(map(lambda x: LEADERBOARD_COLUMNS_RENAME.get(x, x), columns))
|