File size: 4,787 Bytes
eb50e2e 21e1c3f eb50e2e 8f9985e eb50e2e 8f9985e eb50e2e 8f9985e eb50e2e 8f9985e eb50e2e 8f9985e eb50e2e 8f9985e eb50e2e 8f9985e eb50e2e 8f9985e eb50e2e 4d9df8e 21e1c3f 8f9985e eb50e2e 8f9985e eb50e2e 8f9985e eb50e2e 8f9985e eb50e2e 8f9985e 0f3e1b5 eb50e2e 8f9985e eb50e2e 8f9985e eb50e2e 8f9985e f412a50 eb50e2e f412a50 8f9985e eb50e2e 8f9985e eb50e2e 8f9985e eb50e2e 8f9985e 4d9df8e eb50e2e 8f9985e 4d9df8e eb50e2e 8f9985e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
import pytest
import pandas as pd
import gradio as gr
from validation import validate_csv_file, validate_csv_can_be_read, validate_dataframe
from constants import REQUIRED_COLUMNS, ASSAY_LIST
class TestValidateCsvCanBeRead:
"""Test cases for validate_csv_can_be_read function"""
def test_valid_csv_can_be_read(self, valid_csv_content):
df = validate_csv_can_be_read(valid_csv_content)
assert isinstance(df, pd.DataFrame)
def test_empty_csv_raises_error(self):
empty_csv = ""
with pytest.raises(gr.Error) as exc_info:
validate_csv_can_be_read(empty_csv)
assert "empty or contains no valid data" in str(exc_info.value)
def test_invalid_csv_format_raises_error(self):
# Create a CSV with malformed structure that pandas cannot parse
malformed_csv = 'column1,column2\nvalue1,"unclosed quote\nvalue4,value5'
with pytest.raises(gr.Error) as exc_info:
validate_csv_can_be_read(malformed_csv)
assert "Invalid CSV format" in str(exc_info.value)
def test_csv_with_quoted_fields_can_be_read(self):
# Create CSV with quoted fields and enough rows
base_row = 'test_antibody,"EVQLVESGGGLVQPGGSLRLSCAASGFTFSSYAMHWVRQAPGKGLEWVSAISGSGGSTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCARDYGDGYYFDYWGQGTLVTVSS","DIQMTQSPSSLSASVGDRVTITCRASQSISSYLNWYQQKPGKAPKLLIYAASTLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSTPFTFGQGTKVEIK",95.2,0.85,0.92,0.78,0.81,72.5'
csv_content = "antibody_name,vh_protein_sequence,vl_protein_sequence,SEC %Monomer,HIC,PR_CHO,AC-SINS_pH6.0,AC-SINS_pH7.4,Tm\n"
csv_content += "\n".join([base_row] * 10)
df = validate_csv_can_be_read(csv_content)
assert isinstance(df, pd.DataFrame)
class TestValidateDataframe:
def test_valid_dataframe_passes(self, valid_input_dataframe):
validate_dataframe(valid_input_dataframe)
def test_missing_columns_raises_error(self, valid_input_dataframe):
missing_column = REQUIRED_COLUMNS[0]
df = valid_input_dataframe.copy()
df.drop(columns=[missing_column], inplace=True)
with pytest.raises(gr.Error) as exc_info:
validate_dataframe(df)
assert f"Missing required columns: {missing_column}" in str(exc_info.value)
def test_at_least_one_assay_column_raises_error(self, valid_input_dataframe):
df = valid_input_dataframe.copy()
df.drop(columns=ASSAY_LIST, inplace=True, errors="ignore")
with pytest.raises(gr.Error) as exc_info:
validate_dataframe(df)
assert "CSV should include at least one of the following assay columns" in str(
exc_info.value
)
def test_empty_dataframe_raises_error(self, valid_input_dataframe):
empty_df = valid_input_dataframe.head(0)
with pytest.raises(gr.Error) as exc_info:
validate_dataframe(empty_df)
assert "CSV file is empty" in str(exc_info.value)
def test_missing_antibodies_raises_error(self, valid_input_dataframe):
df = valid_input_dataframe.head(50)
with pytest.raises(gr.Error) as exc_info:
validate_dataframe(df)
assert "Missing predictions for" in str(exc_info.value)
def test_missing_values_raises_error(self, valid_input_dataframe):
bad_column = REQUIRED_COLUMNS[0]
df = valid_input_dataframe.copy()
df[bad_column] = [None] * len(df)
with pytest.raises(gr.Error) as exc_info:
validate_dataframe(df)
assert f"contains {len(df)} missing values" in str(exc_info.value)
def test_csv_with_extra_columns_passes(self, valid_input_dataframe):
extra_column = "extra_column_1"
df = valid_input_dataframe.copy()
df[extra_column] = ["extra1"] * len(df)
df[extra_column] = ["extra2"] * len(df)
validate_dataframe(df)
def test_duplicate_antibody_names_raises_error(self, valid_input_dataframe):
df = valid_input_dataframe.copy()
df = pd.concat([df, df.head(1)], ignore_index=True)
with pytest.raises(gr.Error) as exc_info:
validate_dataframe(df)
assert "CSV should have only one row per antibody. Found 1 duplicates." in str(
exc_info.value
)
def test_unrecognized_antibody_names_raises_error(self, valid_input_dataframe):
df = valid_input_dataframe.copy()
df.loc[0, "antibody_name"] = "unrecognized_antibody"
with pytest.raises(gr.Error) as exc_info:
validate_dataframe(df)
assert f"Found unrecognized antibody names: {'unrecognized_antibody'}" in str(
exc_info.value
)
class TestValidateCsvFile:
def test_valid_csv_passes(self, valid_csv_content):
validate_csv_file(valid_csv_content)
|