File size: 2,853 Bytes
7a1c35b
 
 
 
 
 
4d9df8e
7a1c35b
 
 
 
 
 
 
 
 
 
 
62b6599
7a1c35b
 
 
 
 
 
 
 
 
 
 
61fa714
 
 
 
7a1c35b
471531b
15ae508
471531b
7a1c35b
 
 
 
 
0f3e1b5
b2a1e67
 
 
 
 
bff3b9b
7ac33bb
b2a1e67
 
 
bff3b9b
 
 
7ac33bb
b2a1e67
7a1c35b
 
 
8f9985e
7a1c35b
 
 
8f9985e
 
 
5d5df93
10e69e7
89d69bf
 
61fa714
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
"""
Constants for the Antibody Developability Benchmark
"""

import os
from huggingface_hub import HfApi
import pandas as pd

ASSAY_LIST = ["AC-SINS_pH7.4", "PR_CHO", "HIC", "Tm2", "Titer"]
ASSAY_RENAME = {
    "AC-SINS_pH7.4": "Self-association",
    "PR_CHO": "Polyreactivity",
    "HIC": "Hydrophobicity",
    "Tm2": "Thermostability",
    "Titer": "Titer",
}
ASSAY_DESCRIPTION = {
    "AC-SINS_pH7.4": "Self association by AC-SINS at pH 7.4",
    "PR_CHO": "Polyreactivity by bead-based method against CHO SMP",
    "HIC": "Hydrophobicity by HIC",
    "Tm2": "Thermostability by nanoDSF",
    "Titer": "Titer by Valita",
}
ASSAY_EMOJIS = {
    "AC-SINS_pH7.4": "🧲",
    "PR_CHO": "🎯",
    "HIC": "💧",
    "Tm2": "🌡️",
    "Titer": "🧪",
}
# Tabs with emojis
ABOUT_TAB_NAME = "📖 About / Rules"
FAQ_TAB_NAME = "❓ FAQs"
SUBMIT_TAB_NAME = "✉️ Submit"

REGISTRATION_CODE = "GINKGO-ABDEV-2025"
TERMS_URL = "https://euphsfcyogalqiqsawbo.supabase.co/storage/v1/object/public/gdpweb/pdfs/2025%20Ginkgo%20Antibody%20Developability%20Prediction%20Competition%202025-08-28-v2.pdf"

# Input CSV file requirements
REQUIRED_COLUMNS: list[str] = [
    "antibody_name",
    "vh_protein_sequence",
    "vl_protein_sequence",
]
# Cross validation
CV_COLUMN = "hierarchical_cluster_IgG_isotype_stratified_fold"
# Example files
EXAMPLE_FILE_DICT = {
    "GDPa1": "data/example-predictions.csv",
    "GDPa1_cross_validation": "data/example-predictions-cv.csv",
    "Heldout Test Set": "data/example-predictions-heldout.csv",
}
ANTIBODY_NAMES_DICT = {
    "GDPa1": pd.read_csv(EXAMPLE_FILE_DICT["GDPa1"])["antibody_name"].tolist(),
    "GDPa1_cross_validation": pd.read_csv(EXAMPLE_FILE_DICT["GDPa1_cross_validation"])[
        "antibody_name"
    ].tolist(),
    "Heldout Test Set": pd.read_csv(EXAMPLE_FILE_DICT["Heldout Test Set"])["antibody_name"].tolist(),
}

# Huggingface API
TOKEN = os.environ.get("HF_TOKEN")
CACHE_PATH = os.getenv("HF_HOME", ".")
API = HfApi(token=TOKEN)

# Huggingface repos
ORGANIZATION = "ginkgo-datapoints"
SUBMISSIONS_REPO = f"{ORGANIZATION}/abdev-bench-submissions"
RESULTS_REPO = f"{ORGANIZATION}/abdev-bench-results"

# Leaderboard dataframes
LEADERBOARD_RESULTS_COLUMNS = ["model", "assay", "spearman", "dataset", "user", "submission_time"]  # The columns expected from the results dataset
LEADERBOARD_DISPLAY_COLUMNS = ["model", "property", "spearman", "dataset", "user", "submission_time"]  # After changing assay to property (pretty formatting)
LEADERBOARD_COLUMNS_RENAME = {
    "spearman": "Spearman Correlation",
    "dataset": "Dataset",
    "user": "User",
    "submission_time": "Submission Time",
    "model": "Model Name",
    "property": "Property",
}
def LEADERBOARD_COLUMNS_RENAME_LIST(columns: list[str]) -> list[str]:
    return list(map(lambda x: LEADERBOARD_COLUMNS_RENAME.get(x,x), columns))