validate antibody names
Browse files- constants.py +2 -2
- data/antibody_names.csv +138 -0
- test/conftest.py +2 -6
- test/test_validation.py +20 -17
- validation.py +16 -6
constants.py
CHANGED
@@ -4,7 +4,7 @@ Constants for the Antibody Developability Benchmark
|
|
4 |
|
5 |
import os
|
6 |
from huggingface_hub import HfApi
|
7 |
-
|
8 |
|
9 |
ASSAY_LIST = ["AC-SINS_pH7.4", "PR_CHO", "HIC", "Tm2", "Titer"]
|
10 |
ASSAY_RENAME = {
|
@@ -32,11 +32,11 @@ ASSAY_EMOJIS = {
|
|
32 |
# Input CSV file requirements
|
33 |
MINIMAL_NUMBER_OF_ROWS: int = 50
|
34 |
REQUIRED_COLUMNS: list[str] = [
|
35 |
-
"antibody_id",
|
36 |
"antibody_name",
|
37 |
"vh_protein_sequence",
|
38 |
"vl_protein_sequence",
|
39 |
] + ASSAY_LIST
|
|
|
40 |
|
41 |
# Huggingface API
|
42 |
TOKEN = os.environ.get("HF_TOKEN")
|
|
|
4 |
|
5 |
import os
|
6 |
from huggingface_hub import HfApi
|
7 |
+
import pandas as pd
|
8 |
|
9 |
ASSAY_LIST = ["AC-SINS_pH7.4", "PR_CHO", "HIC", "Tm2", "Titer"]
|
10 |
ASSAY_RENAME = {
|
|
|
32 |
# Input CSV file requirements
|
33 |
MINIMAL_NUMBER_OF_ROWS: int = 50
|
34 |
REQUIRED_COLUMNS: list[str] = [
|
|
|
35 |
"antibody_name",
|
36 |
"vh_protein_sequence",
|
37 |
"vl_protein_sequence",
|
38 |
] + ASSAY_LIST
|
39 |
+
ANTIBODY_NAMES = pd.read_csv("data/antibody_names.csv")["antibody_name"].tolist()
|
40 |
|
41 |
# Huggingface API
|
42 |
TOKEN = os.environ.get("HF_TOKEN")
|
data/antibody_names.csv
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
antibody_name
|
2 |
+
abituzumab
|
3 |
+
abrilumab
|
4 |
+
adalimumab
|
5 |
+
alemtuzumab
|
6 |
+
alirocumab
|
7 |
+
anifrolumab
|
8 |
+
atezolizumab
|
9 |
+
bapineuzumab
|
10 |
+
basiliximab
|
11 |
+
bavituximab
|
12 |
+
belimumab
|
13 |
+
benralizumab
|
14 |
+
bevacizumab
|
15 |
+
bimagrumab
|
16 |
+
blosozumab
|
17 |
+
bococizumab
|
18 |
+
brentuximab
|
19 |
+
briakinumab
|
20 |
+
brodalumab
|
21 |
+
canakinumab
|
22 |
+
carlumab
|
23 |
+
certolizumab
|
24 |
+
cetuximab
|
25 |
+
cixutumumab
|
26 |
+
clazakizumab
|
27 |
+
codrituzumab
|
28 |
+
crenezumab
|
29 |
+
dacetuzumab
|
30 |
+
daclizumab
|
31 |
+
dalotuzumab
|
32 |
+
daratumumab
|
33 |
+
denosumab
|
34 |
+
dinutuximab
|
35 |
+
drozitumab
|
36 |
+
duligotuzumab
|
37 |
+
dupilumab
|
38 |
+
eculizumab
|
39 |
+
efalizumab
|
40 |
+
eldelumab
|
41 |
+
elotuzumab
|
42 |
+
emibetuzumab
|
43 |
+
enokizumab
|
44 |
+
epratuzumab
|
45 |
+
etrolizumab
|
46 |
+
evolocumab
|
47 |
+
farletuzumab
|
48 |
+
fasinumab
|
49 |
+
fezakinumab
|
50 |
+
ficlatuzumab
|
51 |
+
figitumumab
|
52 |
+
fletikumab
|
53 |
+
foralumab
|
54 |
+
fresolimumab
|
55 |
+
fulranumab
|
56 |
+
galiximab
|
57 |
+
ganitumab
|
58 |
+
gantenerumab
|
59 |
+
gemtuzumab
|
60 |
+
gevokizumab
|
61 |
+
girentuximab
|
62 |
+
glembatumumab
|
63 |
+
golimumab
|
64 |
+
guselkumab
|
65 |
+
ibalizumab
|
66 |
+
imgatuzumab
|
67 |
+
infliximab
|
68 |
+
inotuzumab
|
69 |
+
ipilimumab
|
70 |
+
ixekizumab
|
71 |
+
lampalizumab
|
72 |
+
lebrikizumab
|
73 |
+
lenzilumab
|
74 |
+
lintuzumab
|
75 |
+
lirilumab
|
76 |
+
lumiliximab
|
77 |
+
matuzumab
|
78 |
+
mavrilimumab
|
79 |
+
mepolizumab
|
80 |
+
mogamulizumab
|
81 |
+
motavizumab
|
82 |
+
muromonab
|
83 |
+
natalizumab
|
84 |
+
necitumumab
|
85 |
+
nimotuzumab
|
86 |
+
nivolumab
|
87 |
+
obinutuzumab
|
88 |
+
ocrelizumab
|
89 |
+
ofatumumab
|
90 |
+
olaratumab
|
91 |
+
olokizumab
|
92 |
+
omalizumab
|
93 |
+
onartuzumab
|
94 |
+
otelixizumab
|
95 |
+
otlertuzumab
|
96 |
+
ozanezumab
|
97 |
+
palivizumab
|
98 |
+
panitumumab
|
99 |
+
panobacumab
|
100 |
+
parsatuzumab
|
101 |
+
patritumab
|
102 |
+
pembrolizumab
|
103 |
+
pertuzumab
|
104 |
+
pinatuzumab
|
105 |
+
polatuzumab
|
106 |
+
ponezumab
|
107 |
+
radretumab
|
108 |
+
ramucirumab
|
109 |
+
ranibizumab
|
110 |
+
reslizumab
|
111 |
+
rilotumumab
|
112 |
+
rituximab
|
113 |
+
robatumumab
|
114 |
+
romosozumab
|
115 |
+
sarilumab
|
116 |
+
secukinumab
|
117 |
+
seribantumab
|
118 |
+
sifalimumab
|
119 |
+
siltuximab
|
120 |
+
simtuzumab
|
121 |
+
sirukumab
|
122 |
+
tabalumab
|
123 |
+
tanezumab
|
124 |
+
teplizumab
|
125 |
+
tigatuzumab
|
126 |
+
tildrakizumab
|
127 |
+
tocilizumab
|
128 |
+
tovetumab
|
129 |
+
tralokinumab
|
130 |
+
trastuzumab
|
131 |
+
tremelimumab
|
132 |
+
urelumab
|
133 |
+
ustekinumab
|
134 |
+
vedolizumab
|
135 |
+
veltuzumab
|
136 |
+
visilizumab
|
137 |
+
zalutumumab
|
138 |
+
zanolimumab
|
test/conftest.py
CHANGED
@@ -1,14 +1,12 @@
|
|
1 |
import pytest
|
2 |
import pandas as pd
|
3 |
-
from constants import MINIMAL_NUMBER_OF_ROWS, ASSAY_LIST
|
4 |
|
5 |
|
6 |
@pytest.fixture
|
7 |
def valid_csv_data():
|
8 |
-
"""Fixture providing valid CSV data with all required columns"""
|
9 |
return {
|
10 |
-
"
|
11 |
-
"antibody_name": ["AB001"] * MINIMAL_NUMBER_OF_ROWS,
|
12 |
"vh_protein_sequence": [
|
13 |
"EVQLVESGGGLVQPGGSLRLSCAASGFTFSSYAMHWVRQAPGKGLEWVSAISGSGGSTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCARDYGDGYYFDYWGQGTLVTVSS"
|
14 |
]
|
@@ -23,11 +21,9 @@ def valid_csv_data():
|
|
23 |
|
24 |
@pytest.fixture
|
25 |
def valid_input_dataframe(valid_csv_data):
|
26 |
-
"""Fixture providing a valid input dataframe"""
|
27 |
return pd.DataFrame(valid_csv_data)
|
28 |
|
29 |
|
30 |
@pytest.fixture
|
31 |
def valid_csv_content(valid_input_dataframe):
|
32 |
-
"""Fixture providing valid CSV content as string"""
|
33 |
return valid_input_dataframe.to_csv(index=False)
|
|
|
1 |
import pytest
|
2 |
import pandas as pd
|
3 |
+
from constants import MINIMAL_NUMBER_OF_ROWS, ASSAY_LIST, ANTIBODY_NAMES
|
4 |
|
5 |
|
6 |
@pytest.fixture
|
7 |
def valid_csv_data():
|
|
|
8 |
return {
|
9 |
+
"antibody_name": ANTIBODY_NAMES[:MINIMAL_NUMBER_OF_ROWS],
|
|
|
10 |
"vh_protein_sequence": [
|
11 |
"EVQLVESGGGLVQPGGSLRLSCAASGFTFSSYAMHWVRQAPGKGLEWVSAISGSGGSTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCARDYGDGYYFDYWGQGTLVTVSS"
|
12 |
]
|
|
|
21 |
|
22 |
@pytest.fixture
|
23 |
def valid_input_dataframe(valid_csv_data):
|
|
|
24 |
return pd.DataFrame(valid_csv_data)
|
25 |
|
26 |
|
27 |
@pytest.fixture
|
28 |
def valid_csv_content(valid_input_dataframe):
|
|
|
29 |
return valid_input_dataframe.to_csv(index=False)
|
test/test_validation.py
CHANGED
@@ -9,14 +9,12 @@ class TestValidateCsvCanBeRead:
|
|
9 |
"""Test cases for validate_csv_can_be_read function"""
|
10 |
|
11 |
def test_valid_csv_can_be_read(self, valid_csv_content):
|
12 |
-
"""Test that valid CSV content can be read"""
|
13 |
df = validate_csv_can_be_read(valid_csv_content)
|
14 |
assert isinstance(df, pd.DataFrame)
|
15 |
assert len(df) == MINIMAL_NUMBER_OF_ROWS
|
16 |
assert list(df.columns) == list(REQUIRED_COLUMNS)
|
17 |
|
18 |
def test_empty_csv_raises_error(self):
|
19 |
-
"""Test that empty CSV raises an error"""
|
20 |
empty_csv = ""
|
21 |
|
22 |
with pytest.raises(gr.Error) as exc_info:
|
@@ -25,7 +23,6 @@ class TestValidateCsvCanBeRead:
|
|
25 |
assert "empty or contains no valid data" in str(exc_info.value)
|
26 |
|
27 |
def test_invalid_csv_format_raises_error(self):
|
28 |
-
"""Test that invalid CSV format raises an error"""
|
29 |
# Create a CSV with malformed structure that pandas cannot parse
|
30 |
malformed_csv = 'column1,column2\nvalue1,"unclosed quote\nvalue4,value5'
|
31 |
|
@@ -35,10 +32,9 @@ class TestValidateCsvCanBeRead:
|
|
35 |
assert "Invalid CSV format" in str(exc_info.value)
|
36 |
|
37 |
def test_csv_with_quoted_fields_can_be_read(self):
|
38 |
-
"""Test that CSV with quoted fields can be read"""
|
39 |
# Create CSV with quoted fields and enough rows
|
40 |
-
base_row = '
|
41 |
-
csv_content = "
|
42 |
csv_content += "\n".join([base_row] * MINIMAL_NUMBER_OF_ROWS)
|
43 |
|
44 |
df = validate_csv_can_be_read(csv_content)
|
@@ -47,14 +43,10 @@ class TestValidateCsvCanBeRead:
|
|
47 |
|
48 |
|
49 |
class TestValidateDataframe:
|
50 |
-
"""Test cases for validate_dataframe function"""
|
51 |
-
|
52 |
def test_valid_dataframe_passes(self, valid_input_dataframe):
|
53 |
-
"""Test that valid DataFrame passes validation"""
|
54 |
validate_dataframe(valid_input_dataframe)
|
55 |
|
56 |
def test_missing_columns_raises_error(self, valid_input_dataframe):
|
57 |
-
"""Test that DataFrame with missing columns raises an error"""
|
58 |
missing_column = REQUIRED_COLUMNS[0]
|
59 |
df = valid_input_dataframe.copy()
|
60 |
df.drop(columns=[missing_column], inplace=True)
|
@@ -65,7 +57,6 @@ class TestValidateDataframe:
|
|
65 |
assert f"Missing required columns: {missing_column}" in str(exc_info.value)
|
66 |
|
67 |
def test_empty_dataframe_raises_error(self, valid_input_dataframe):
|
68 |
-
"""Test that empty DataFrame raises an error"""
|
69 |
empty_df = valid_input_dataframe.head(0)
|
70 |
|
71 |
with pytest.raises(gr.Error) as exc_info:
|
@@ -74,7 +65,6 @@ class TestValidateDataframe:
|
|
74 |
assert "CSV file is empty" in str(exc_info.value)
|
75 |
|
76 |
def test_insufficient_rows_raises_error(self, valid_input_dataframe):
|
77 |
-
"""Test that DataFrame with insufficient rows raises an error"""
|
78 |
df = valid_input_dataframe.head(MINIMAL_NUMBER_OF_ROWS - 1)
|
79 |
with pytest.raises(gr.Error) as exc_info:
|
80 |
validate_dataframe(df)
|
@@ -84,7 +74,6 @@ class TestValidateDataframe:
|
|
84 |
)
|
85 |
|
86 |
def test_missing_values_raises_error(self, valid_input_dataframe):
|
87 |
-
"""Test that DataFrame with missing values raises an error"""
|
88 |
bad_column = REQUIRED_COLUMNS[0]
|
89 |
df = valid_input_dataframe.copy()
|
90 |
df[bad_column] = [None] * len(df)
|
@@ -94,17 +83,31 @@ class TestValidateDataframe:
|
|
94 |
assert f"contains {len(df)} missing values" in str(exc_info.value)
|
95 |
|
96 |
def test_csv_with_extra_columns_passes(self, valid_input_dataframe):
|
97 |
-
"""Test that DataFrame with extra columns passes validation"""
|
98 |
extra_column = "extra_column_1"
|
99 |
df = valid_input_dataframe.copy()
|
100 |
df[extra_column] = ["extra1"] * len(df)
|
101 |
df[extra_column] = ["extra2"] * len(df)
|
102 |
validate_dataframe(df)
|
103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
|
105 |
-
class TestValidateCsvFile:
|
106 |
-
"""Test cases for the combined validate_csv_file function"""
|
107 |
|
|
|
108 |
def test_valid_csv_passes(self, valid_csv_content):
|
109 |
-
"""Test that a valid CSV with all required columns passes validation"""
|
110 |
validate_csv_file(valid_csv_content)
|
|
|
9 |
"""Test cases for validate_csv_can_be_read function"""
|
10 |
|
11 |
def test_valid_csv_can_be_read(self, valid_csv_content):
|
|
|
12 |
df = validate_csv_can_be_read(valid_csv_content)
|
13 |
assert isinstance(df, pd.DataFrame)
|
14 |
assert len(df) == MINIMAL_NUMBER_OF_ROWS
|
15 |
assert list(df.columns) == list(REQUIRED_COLUMNS)
|
16 |
|
17 |
def test_empty_csv_raises_error(self):
|
|
|
18 |
empty_csv = ""
|
19 |
|
20 |
with pytest.raises(gr.Error) as exc_info:
|
|
|
23 |
assert "empty or contains no valid data" in str(exc_info.value)
|
24 |
|
25 |
def test_invalid_csv_format_raises_error(self):
|
|
|
26 |
# Create a CSV with malformed structure that pandas cannot parse
|
27 |
malformed_csv = 'column1,column2\nvalue1,"unclosed quote\nvalue4,value5'
|
28 |
|
|
|
32 |
assert "Invalid CSV format" in str(exc_info.value)
|
33 |
|
34 |
def test_csv_with_quoted_fields_can_be_read(self):
|
|
|
35 |
# Create CSV with quoted fields and enough rows
|
36 |
+
base_row = 'test_antibody,"EVQLVESGGGLVQPGGSLRLSCAASGFTFSSYAMHWVRQAPGKGLEWVSAISGSGGSTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCARDYGDGYYFDYWGQGTLVTVSS","DIQMTQSPSSLSASVGDRVTITCRASQSISSYLNWYQQKPGKAPKLLIYAASTLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSTPFTFGQGTKVEIK",95.2,0.85,0.92,0.78,0.81,72.5'
|
37 |
+
csv_content = "antibody_name,vh_protein_sequence,vl_protein_sequence,SEC %Monomer,HIC,PR_CHO,AC-SINS_pH6.0,AC-SINS_pH7.4,Tm\n"
|
38 |
csv_content += "\n".join([base_row] * MINIMAL_NUMBER_OF_ROWS)
|
39 |
|
40 |
df = validate_csv_can_be_read(csv_content)
|
|
|
43 |
|
44 |
|
45 |
class TestValidateDataframe:
|
|
|
|
|
46 |
def test_valid_dataframe_passes(self, valid_input_dataframe):
|
|
|
47 |
validate_dataframe(valid_input_dataframe)
|
48 |
|
49 |
def test_missing_columns_raises_error(self, valid_input_dataframe):
|
|
|
50 |
missing_column = REQUIRED_COLUMNS[0]
|
51 |
df = valid_input_dataframe.copy()
|
52 |
df.drop(columns=[missing_column], inplace=True)
|
|
|
57 |
assert f"Missing required columns: {missing_column}" in str(exc_info.value)
|
58 |
|
59 |
def test_empty_dataframe_raises_error(self, valid_input_dataframe):
|
|
|
60 |
empty_df = valid_input_dataframe.head(0)
|
61 |
|
62 |
with pytest.raises(gr.Error) as exc_info:
|
|
|
65 |
assert "CSV file is empty" in str(exc_info.value)
|
66 |
|
67 |
def test_insufficient_rows_raises_error(self, valid_input_dataframe):
|
|
|
68 |
df = valid_input_dataframe.head(MINIMAL_NUMBER_OF_ROWS - 1)
|
69 |
with pytest.raises(gr.Error) as exc_info:
|
70 |
validate_dataframe(df)
|
|
|
74 |
)
|
75 |
|
76 |
def test_missing_values_raises_error(self, valid_input_dataframe):
|
|
|
77 |
bad_column = REQUIRED_COLUMNS[0]
|
78 |
df = valid_input_dataframe.copy()
|
79 |
df[bad_column] = [None] * len(df)
|
|
|
83 |
assert f"contains {len(df)} missing values" in str(exc_info.value)
|
84 |
|
85 |
def test_csv_with_extra_columns_passes(self, valid_input_dataframe):
|
|
|
86 |
extra_column = "extra_column_1"
|
87 |
df = valid_input_dataframe.copy()
|
88 |
df[extra_column] = ["extra1"] * len(df)
|
89 |
df[extra_column] = ["extra2"] * len(df)
|
90 |
validate_dataframe(df)
|
91 |
|
92 |
+
def test_duplicate_antibody_names_raises_error(self, valid_input_dataframe):
|
93 |
+
df = valid_input_dataframe.copy()
|
94 |
+
df = pd.concat([df, df.head(1)], ignore_index=True)
|
95 |
+
with pytest.raises(gr.Error) as exc_info:
|
96 |
+
validate_dataframe(df)
|
97 |
+
assert "CSV should have only one row per antibody. Found 1 duplicates." in str(
|
98 |
+
exc_info.value
|
99 |
+
)
|
100 |
+
|
101 |
+
def test_unrecognized_antibody_names_raises_error(self, valid_input_dataframe):
|
102 |
+
df = valid_input_dataframe.copy()
|
103 |
+
df.loc[0, "antibody_name"] = "unrecognized_antibody"
|
104 |
+
with pytest.raises(gr.Error) as exc_info:
|
105 |
+
validate_dataframe(df)
|
106 |
+
assert f"Found unrecognized antibody names: {'unrecognized_antibody'}" in str(
|
107 |
+
exc_info.value
|
108 |
+
)
|
109 |
|
|
|
|
|
110 |
|
111 |
+
class TestValidateCsvFile:
|
112 |
def test_valid_csv_passes(self, valid_csv_content):
|
|
|
113 |
validate_csv_file(valid_csv_content)
|
validation.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import pandas as pd
|
2 |
import io
|
3 |
import gradio as gr
|
4 |
-
from constants import REQUIRED_COLUMNS, MINIMAL_NUMBER_OF_ROWS
|
5 |
|
6 |
|
7 |
def validate_csv_can_be_read(file_content: str) -> pd.DataFrame:
|
@@ -61,19 +61,29 @@ def validate_dataframe(df: pd.DataFrame) -> None:
|
|
61 |
if df.empty:
|
62 |
raise gr.Error("β CSV file is empty")
|
63 |
|
64 |
-
#
|
65 |
for col in REQUIRED_COLUMNS:
|
66 |
missing_count = df[col].isnull().sum()
|
67 |
if missing_count > 0:
|
68 |
raise gr.Error(f"β Column '{col}' contains {missing_count} missing values")
|
69 |
|
70 |
-
#
|
71 |
if len(df) < MINIMAL_NUMBER_OF_ROWS:
|
72 |
raise gr.Error(f"β CSV should have at least {MINIMAL_NUMBER_OF_ROWS} rows")
|
73 |
|
74 |
-
|
75 |
-
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
|
79 |
def validate_csv_file(file_content: str) -> None:
|
|
|
1 |
import pandas as pd
|
2 |
import io
|
3 |
import gradio as gr
|
4 |
+
from constants import REQUIRED_COLUMNS, MINIMAL_NUMBER_OF_ROWS, ANTIBODY_NAMES
|
5 |
|
6 |
|
7 |
def validate_csv_can_be_read(file_content: str) -> pd.DataFrame:
|
|
|
61 |
if df.empty:
|
62 |
raise gr.Error("β CSV file is empty")
|
63 |
|
64 |
+
# No missing values in required columns
|
65 |
for col in REQUIRED_COLUMNS:
|
66 |
missing_count = df[col].isnull().sum()
|
67 |
if missing_count > 0:
|
68 |
raise gr.Error(f"β Column '{col}' contains {missing_count} missing values")
|
69 |
|
70 |
+
# Above minimal number of rows
|
71 |
if len(df) < MINIMAL_NUMBER_OF_ROWS:
|
72 |
raise gr.Error(f"β CSV should have at least {MINIMAL_NUMBER_OF_ROWS} rows")
|
73 |
|
74 |
+
# All names should be unique
|
75 |
+
n_duplicates = df["antibody_name"].duplicated().sum()
|
76 |
+
if n_duplicates > 0:
|
77 |
+
raise gr.Error(
|
78 |
+
f"β CSV should have only one row per antibody. Found {n_duplicates} duplicates."
|
79 |
+
)
|
80 |
+
|
81 |
+
# All antibody names should be recognizable
|
82 |
+
unrecognized_antibodies = set(df["antibody_name"]) - set(ANTIBODY_NAMES)
|
83 |
+
if unrecognized_antibodies:
|
84 |
+
raise gr.Error(
|
85 |
+
f"β Found unrecognized antibody names: {', '.join(unrecognized_antibodies)}"
|
86 |
+
)
|
87 |
|
88 |
|
89 |
def validate_csv_file(file_content: str) -> None:
|