pquintero commited on
Commit
bff3b9b
·
1 Parent(s): f412a50

rename to cross_validation

Browse files
Files changed (2) hide show
  1. constants.py +4 -2
  2. validation.py +6 -4
constants.py CHANGED
@@ -40,11 +40,13 @@ CV_COLUMN = "hierarchical_cluster_IgG_isotype_stratified_fold"
40
  # Example files
41
  EXAMPLE_FILE_DICT = {
42
  "GDPa1": "data/example-predictions.csv",
43
- "GDPa1_CV": "data/example-predictions-cv.csv",
44
  }
45
  ANTIBODY_NAMES_DICT = {
46
  "GDPa1": pd.read_csv(EXAMPLE_FILE_DICT["GDPa1"])["antibody_name"].tolist(),
47
- "GDPa1_CV": pd.read_csv(EXAMPLE_FILE_DICT["GDPa1_CV"])["antibody_name"].tolist(),
 
 
48
  }
49
 
50
  # Huggingface API
 
40
  # Example files
41
  EXAMPLE_FILE_DICT = {
42
  "GDPa1": "data/example-predictions.csv",
43
+ "GDPa1_cross_validation": "data/example-predictions-cv.csv",
44
  }
45
  ANTIBODY_NAMES_DICT = {
46
  "GDPa1": pd.read_csv(EXAMPLE_FILE_DICT["GDPa1"])["antibody_name"].tolist(),
47
+ "GDPa1_cross_validation": pd.read_csv(EXAMPLE_FILE_DICT["GDPa1_cross_validation"])[
48
+ "antibody_name"
49
+ ].tolist(),
50
  }
51
 
52
  # Huggingface API
validation.py CHANGED
@@ -47,7 +47,9 @@ def validate_csv_can_be_read(file_content: str) -> pd.DataFrame:
47
  raise gr.Error(f"❌ Unexpected error reading CSV file: {str(e)}")
48
 
49
 
50
- def validate_cv_submission(df: pd.DataFrame, submission_type: str = "GDPa1_CV") -> None:
 
 
51
  """Validate cross-validation submission"""
52
  # Must have CV_COLUMN for CV submissions
53
  if CV_COLUMN not in df.columns:
@@ -102,7 +104,7 @@ def validate_dataframe(df: pd.DataFrame, submission_type: str = "GDPa1") -> None
102
  df: pd.DataFrame
103
  The DataFrame to validate.
104
  submission_type: str
105
- Type of submission: "GDPa1" or "GDPa1_CV"
106
 
107
  Raises
108
  ------
@@ -160,7 +162,7 @@ def validate_dataframe(df: pd.DataFrame, submission_type: str = "GDPa1") -> None
160
  f"❌ Missing predictions for {len(missing_antibodies)} antibodies: {', '.join(missing_antibodies)}"
161
  )
162
  # Submission-type specific validation
163
- if submission_type.endswith("_CV"):
164
  validate_cv_submission(df, submission_type)
165
  else: # full_dataset
166
  validate_full_dataset_submission(df)
@@ -175,7 +177,7 @@ def validate_csv_file(file_content: str, submission_type: str = "GDPa1") -> None
175
  file_content: str
176
  The content of the uploaded CSV file.
177
  submission_type: str
178
- Type of submission: "standard" or "cv"
179
 
180
  Raises
181
  ------
 
47
  raise gr.Error(f"❌ Unexpected error reading CSV file: {str(e)}")
48
 
49
 
50
+ def validate_cv_submission(
51
+ df: pd.DataFrame, submission_type: str = "GDPa1_cross_validation"
52
+ ) -> None:
53
  """Validate cross-validation submission"""
54
  # Must have CV_COLUMN for CV submissions
55
  if CV_COLUMN not in df.columns:
 
104
  df: pd.DataFrame
105
  The DataFrame to validate.
106
  submission_type: str
107
+ Type of submission: "GDPa1" or "GDPa1_cross_validation"
108
 
109
  Raises
110
  ------
 
162
  f"❌ Missing predictions for {len(missing_antibodies)} antibodies: {', '.join(missing_antibodies)}"
163
  )
164
  # Submission-type specific validation
165
+ if submission_type.endswith("_cross_validation"):
166
  validate_cv_submission(df, submission_type)
167
  else: # full_dataset
168
  validate_full_dataset_submission(df)
 
177
  file_content: str
178
  The content of the uploaded CSV file.
179
  submission_type: str
180
+ Type of submission: "GDPa1" or "GDPa1_cross_validation"
181
 
182
  Raises
183
  ------