galb-dai commited on
Commit
5048713
·
1 Parent(s): 34a2915

Some changes to validation logic.

Browse files
app.py CHANGED
@@ -3,6 +3,7 @@ import pandas as pd
3
  from apscheduler.schedulers.background import BackgroundScheduler
4
  from gradio_leaderboard import ColumnFilter, Leaderboard, SelectColumns
5
 
 
6
  from src.about import CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, EVALUATION_QUEUE_TEXT, INTRODUCTION_TEXT, TITLE
7
  from src.datamodel.data import F1Data
8
  from src.display.css_html_js import custom_css
@@ -11,21 +12,26 @@ from src.envs import API, CODE_PROBLEMS_REPO, REPO_ID, RESULTS_REPO, SUBMISSIONS
11
  from src.logger import get_logger
12
  from src.populate import get_leaderboard_df
13
  from src.submission.submit import add_new_solutions
 
14
 
15
  logger = get_logger(__name__)
16
 
 
17
  SPLIT = "warmup" # TODO temp
18
- SKIP_VALIDATION = True # TODO temp
19
 
20
 
21
  def restart_space():
22
  API.restart_space(repo_id=REPO_ID)
23
 
24
 
25
- lbdb = F1Data(cp_ds_name=CODE_PROBLEMS_REPO, sub_ds_name=SUBMISSIONS_REPO, res_ds_name=RESULTS_REPO, split=SPLIT)
 
 
 
 
 
26
  leaderboard_df = get_leaderboard_df(RESULTS_REPO)
27
 
28
-
29
  logger.info("Initialized LBDB")
30
 
31
 
@@ -117,9 +123,47 @@ with demo:
117
  submit_button = gr.Button("Submit")
118
  submission_result = gr.Markdown()
119
 
120
- def add_solution_cbk(system_name, org, sys_type, submission_path):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  return add_new_solutions(
122
- lbdb, system_name, org, sys_type, submission_path, skip_validation=SKIP_VALIDATION
 
 
 
 
 
123
  )
124
 
125
  submit_button.click(
@@ -140,13 +184,6 @@ with demo:
140
  value=CITATION_BUTTON_TEXT.strip(),
141
  elem_id="citation-block",
142
  )
143
- # citation_button = gr.Textbox(
144
- # value=CITATION_BUTTON_TEXT,
145
- # # label=CITATION_BUTTON_LABEL,
146
- # lines=20,
147
- # elem_id="citation-button",
148
- # show_copy_button=True,
149
- # )
150
 
151
  logger.info("Scheduler")
152
  scheduler = BackgroundScheduler()
 
3
  from apscheduler.schedulers.background import BackgroundScheduler
4
  from gradio_leaderboard import ColumnFilter, Leaderboard, SelectColumns
5
 
6
+ from display.formatting import styled_error
7
  from src.about import CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, EVALUATION_QUEUE_TEXT, INTRODUCTION_TEXT, TITLE
8
  from src.datamodel.data import F1Data
9
  from src.display.css_html_js import custom_css
 
12
  from src.logger import get_logger
13
  from src.populate import get_leaderboard_df
14
  from src.submission.submit import add_new_solutions
15
+ from src.validation.validate import MAX_INPUT_LENGTH, MIN_INPUT_LENGTH, is_submission_file_valid, is_valid
16
 
17
  logger = get_logger(__name__)
18
 
19
+ ENSURE_ALL_PRESENT = False # TODO: Switch to True.
20
  SPLIT = "warmup" # TODO temp
 
21
 
22
 
23
  def restart_space():
24
  API.restart_space(repo_id=REPO_ID)
25
 
26
 
27
+ lbdb = F1Data(
28
+ cp_ds_name=CODE_PROBLEMS_REPO,
29
+ sub_ds_name=SUBMISSIONS_REPO,
30
+ res_ds_name=RESULTS_REPO,
31
+ split=SPLIT,
32
+ )
33
  leaderboard_df = get_leaderboard_df(RESULTS_REPO)
34
 
 
35
  logger.info("Initialized LBDB")
36
 
37
 
 
123
  submit_button = gr.Button("Submit")
124
  submission_result = gr.Markdown()
125
 
126
+ def add_solution_cbk(
127
+ system_name: str,
128
+ org: str,
129
+ sys_type: str,
130
+ submission_path: str,
131
+ ):
132
+
133
+ try:
134
+ # Validating the submission file.
135
+ if len(submission_path) == 0:
136
+ return styled_error("Please upload JSONL submission file.")
137
+
138
+ if not is_submission_file_valid(submission_path):
139
+ return styled_error("Failed to read JSONL submission file. Please try again later.")
140
+
141
+ # Validating all user-supplied arguments.
142
+ for val, val_name in [
143
+ (system_name, "System name"),
144
+ (org, "Organisation name"),
145
+ (sys_type, "System type"),
146
+ ]:
147
+ if len(val) == 0:
148
+ return styled_error(f"Please fill in the '{val_name}' field.")
149
+
150
+ if not is_valid(val):
151
+ return styled_error(
152
+ f"{val_name} is invalid! Must only contain characters [a-zA-Z0-9], spaces, "
153
+ + "or the special characters '-' and '.', and be of length between "
154
+ + f"{MIN_INPUT_LENGTH} and {MAX_INPUT_LENGTH}."
155
+ )
156
+ except Exception:
157
+ logger.warning("Failed to process user submission", exc_info=True)
158
+ return styled_error("An error occurred. Please try again later.") # Intentionally vague.
159
+
160
  return add_new_solutions(
161
+ lbdb,
162
+ system_name,
163
+ org,
164
+ sys_type,
165
+ submission_path,
166
+ ensure_all_present=ENSURE_ALL_PRESENT,
167
  )
168
 
169
  submit_button.click(
 
184
  value=CITATION_BUTTON_TEXT.strip(),
185
  elem_id="citation-block",
186
  )
 
 
 
 
 
 
 
187
 
188
  logger.info("Scheduler")
189
  scheduler = BackgroundScheduler()
src/display/utils.py CHANGED
@@ -2,7 +2,7 @@ from dataclasses import dataclass
2
  from enum import Enum
3
 
4
 
5
- def _fields(raw_class):
6
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
7
 
8
 
@@ -65,27 +65,12 @@ class ModelType(Enum):
65
  return ModelType.Other
66
 
67
 
68
- class WeightType(Enum):
69
- Adapter = ModelDetails("Adapter")
70
- Original = ModelDetails("Original")
71
- Delta = ModelDetails("Delta")
72
-
73
-
74
  class Precision(Enum):
75
  float16 = ModelDetails("float16")
76
  bfloat16 = ModelDetails("bfloat16")
77
  Unknown = ModelDetails("?")
78
 
79
- def from_str(precision):
80
- if precision in ["torch.float16", "float16"]:
81
- return Precision.float16
82
- if precision in ["torch.bfloat16", "bfloat16"]:
83
- return Precision.bfloat16
84
- return Precision.Unknown
85
-
86
-
87
- # Column selection
88
- COLS = [c.name for c in _fields(AutoEvalColumn) if not c.hidden]
89
 
90
- EVAL_COLS = [c.name for c in _fields(EvalQueueColumn)]
91
- EVAL_TYPES = [c.type for c in _fields(EvalQueueColumn)]
 
 
2
  from enum import Enum
3
 
4
 
5
+ def fields(raw_class):
6
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
7
 
8
 
 
65
  return ModelType.Other
66
 
67
 
 
 
 
 
 
 
68
  class Precision(Enum):
69
  float16 = ModelDetails("float16")
70
  bfloat16 = ModelDetails("bfloat16")
71
  Unknown = ModelDetails("?")
72
 
 
 
 
 
 
 
 
 
 
 
73
 
74
+ COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
75
+ EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
76
+ EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
src/submission/submit.py CHANGED
@@ -5,37 +5,40 @@ import pandas as pd
5
  from datasets import Dataset
6
  from pandas.api.types import is_integer_dtype
7
 
 
8
  from src.datamodel.data import F1Data
9
  from src.display.formatting import styled_error, styled_message
10
  from src.display.utils import ModelType
11
  from src.envs import SUBMISSIONS_REPO
12
  from src.logger import get_logger
 
13
 
14
  logger = get_logger(__name__)
15
 
16
 
17
- def validate_submission(lbdb: F1Data, pd_ds: pd.DataFrame) -> str | None:
18
- logger.info("Validating DS size %d columns %s set %s", len(pd_ds), pd_ds.columns, set(pd_ds.columns))
 
 
 
19
  expected_cols = ["problem_id", "solution"]
20
 
21
  if set(pd_ds.columns) != set(expected_cols):
22
- return f"Expected attributes: {expected_cols}, Got: {pd_ds.columns.tolist()}"
23
 
24
  if not is_integer_dtype(pd_ds["problem_id"]):
25
- return "problem_id must be str convertible to int"
26
 
27
  if any(type(v) is not str for v in pd_ds["solution"]):
28
- return "solution must be of type str"
29
 
30
  submitted_ids = set(pd_ds.problem_id.astype(str))
31
  if submitted_ids != lbdb.code_problem_ids:
32
  missing = lbdb.code_problem_ids - submitted_ids
33
  unknown = submitted_ids - lbdb.code_problem_ids
34
- return f"Mismatched problem IDs: {len(missing)} missing, {len(unknown)} unknown"
35
  if len(pd_ds) > len(lbdb.code_problem_ids):
36
- return "Duplicate problem IDs exist in uploaded file"
37
-
38
- return None
39
 
40
 
41
  def add_new_solutions(
@@ -44,36 +47,33 @@ def add_new_solutions(
44
  org: str,
45
  sys_type: str,
46
  submission_path: str,
47
- skip_validation: bool = False,
48
  ):
49
- logger.info("ADD SUBMISSION! %s path %s", str((system_name, org, sys_type)), submission_path)
50
- if not system_name:
51
- return styled_error("Please fill system name")
52
 
53
- if not org:
54
- return styled_error("Please fill organization name")
 
 
55
 
56
- if not sys_type:
57
- return styled_error("Please select system type")
58
  sys_type = ModelType.from_str(sys_type).name
59
 
60
- if not submission_path:
61
- return styled_error("Please upload JSONL solutions file")
62
-
63
  try:
64
  submission_df = pd.read_json(submission_path, lines=True)
65
- except Exception as e:
66
- return styled_error(f"Cannot read uploaded JSONL file: {str(e)}")
67
-
68
- if not skip_validation:
69
- validation_error = validate_submission(lbdb, submission_df)
70
- if validation_error:
71
- return styled_error(validation_error)
72
 
73
  submission_id = f"{system_name}_{org}_{sys_type}_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"
74
 
75
- # Seems good, creating the eval
76
- print(f"Adding new submission: {submission_id}")
77
  submission_ts = time.time_ns()
78
 
79
  def add_info(row):
 
5
  from datasets import Dataset
6
  from pandas.api.types import is_integer_dtype
7
 
8
+ from app import is_valid
9
  from src.datamodel.data import F1Data
10
  from src.display.formatting import styled_error, styled_message
11
  from src.display.utils import ModelType
12
  from src.envs import SUBMISSIONS_REPO
13
  from src.logger import get_logger
14
+ from validation.validate import is_submission_file_valid
15
 
16
  logger = get_logger(__name__)
17
 
18
 
19
+ def _validate_all_submissions_present(
20
+ lbdb: F1Data,
21
+ pd_ds: pd.DataFrame,
22
+ ):
23
+ logger.info(f"Validating DS size {len(pd_ds)} columns {pd_ds.columns} set {set(pd_ds.columns)}")
24
  expected_cols = ["problem_id", "solution"]
25
 
26
  if set(pd_ds.columns) != set(expected_cols):
27
+ return ValueError(f"Expected attributes: {expected_cols}, Got: {pd_ds.columns.tolist()}")
28
 
29
  if not is_integer_dtype(pd_ds["problem_id"]):
30
+ return ValueError("problem_id must be str convertible to int")
31
 
32
  if any(type(v) is not str for v in pd_ds["solution"]):
33
+ return ValueError("solution must be of type str")
34
 
35
  submitted_ids = set(pd_ds.problem_id.astype(str))
36
  if submitted_ids != lbdb.code_problem_ids:
37
  missing = lbdb.code_problem_ids - submitted_ids
38
  unknown = submitted_ids - lbdb.code_problem_ids
39
+ raise ValueError(f"Mismatched problem IDs: {len(missing)} missing, {len(unknown)} unknown")
40
  if len(pd_ds) > len(lbdb.code_problem_ids):
41
+ return ValueError("Duplicate problem IDs exist in uploaded file")
 
 
42
 
43
 
44
  def add_new_solutions(
 
47
  org: str,
48
  sys_type: str,
49
  submission_path: str,
50
+ ensure_all_present: bool = False,
51
  ):
52
+ logger.info(
53
+ f"Adding new submission! {system_name=}, {org=}, {sys_type=} and {submission_path=}",
54
+ )
55
 
56
+ # Double-checking.
57
+ for val in [system_name, org, sys_type]:
58
+ assert is_valid(val)
59
+ assert is_submission_file_valid(submission_path)
60
 
 
 
61
  sys_type = ModelType.from_str(sys_type).name
62
 
 
 
 
63
  try:
64
  submission_df = pd.read_json(submission_path, lines=True)
65
+ if ensure_all_present:
66
+ _validate_all_submissions_present(lbdb=lbdb, pd_ds=submission_df)
67
+ except Exception:
68
+ logger.warning("Failed to parse submission DF!", exc_info=True)
69
+ return styled_error(
70
+ "An error occurred. Please try again later."
71
+ ) # Use same message as external error. Avoid infoleak.
72
 
73
  submission_id = f"{system_name}_{org}_{sys_type}_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"
74
 
75
+ # Seems good, creating the eval.
76
+ logger.info(f"Adding new submission: {submission_id}")
77
  submission_ts = time.time_ns()
78
 
79
  def add_info(row):
src/validation/__init__.py ADDED
File without changes
src/validation/validate.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import string
4
+
5
+ DATASET_SIZE = 120
6
+
7
+ MIN_INPUT_LENGTH = 2
8
+ MAX_INPUT_LENGTH = 20
9
+
10
+ MIN_SUBMISSION_SIZE = 1
11
+ MAX_SUBMISSION_SIZE = 1024 * 1024 * 120 # 120 MB.
12
+ MAX_SINGLE_SUBMISSION_SIZE = 1024 * 1024 # 1MB.
13
+ MAX_SUBMISSION_LINES = DATASET_SIZE + 1 # Allow empty line.
14
+
15
+
16
+ def is_valid(
17
+ s: str,
18
+ min_length: int = MIN_INPUT_LENGTH,
19
+ max_length: int = MAX_INPUT_LENGTH,
20
+ ) -> bool:
21
+ """
22
+ @brief Checks whether the given string is valid.
23
+ @param s The string to validate.
24
+ @return True iff all characters are in [a-zA-Z0-9], spaces, or '.' and '-', and the length if between
25
+ min length and max length.
26
+ """
27
+
28
+ characters = [c for c in s] # Not using the length from len(.) as that includes unicode characters.
29
+ if len(characters) < min_length or len(characters) > max_length:
30
+ return False
31
+
32
+ # Very important: We delimit using underscores. So these _CANNOT_ be allowed in sanitised strings.
33
+ ALLOWED = (
34
+ [c for c in string.ascii_lowercase]
35
+ + [c for c in string.ascii_uppercase]
36
+ + [c for c in string.digits]
37
+ + [" ", ".", "-"]
38
+ )
39
+ for c in s:
40
+ if c not in ALLOWED:
41
+ return False
42
+ return True
43
+
44
+
45
+ def is_submission_file_valid(submission_path: str) -> bool:
46
+ """
47
+ @brief Checks whether the given submission file is valid.
48
+ @param submission_path The path to the submission file.
49
+ @return True iff the file is within the size constraints, a JSONL, and every line is no longer than
50
+ the fixed maximum bound.
51
+ """
52
+
53
+ if not os.path.exists(submission_path):
54
+ return False
55
+
56
+ submission_size = os.stat(submission_path).st_size
57
+ if submission_size < MIN_SUBMISSION_SIZE or submission_size > MAX_SUBMISSION_SIZE:
58
+ return False
59
+
60
+ with open(submission_path, "r") as f:
61
+
62
+ # Not using readlines() to avoid consuming a large buffer at once.
63
+ n_lines = 0
64
+ seen_ids = set()
65
+ while len(line := f.readline(MAX_SINGLE_SUBMISSION_SIZE)) > 0:
66
+ n_lines += 1
67
+ if n_lines > MAX_SUBMISSION_LINES:
68
+ return False
69
+
70
+ if not line.startswith("{") or not line.endswith("}"):
71
+ return False
72
+
73
+ d = json.loads(line)
74
+ if set(d.keys()) != set(["problem_id", "solution"]):
75
+ return False
76
+
77
+ if not ((type(d["problem_id"]) is str or type(d["problem_id"]) is int) and type(d["solution"] is str)):
78
+ return False
79
+ if not d["problem_id"].isdigit():
80
+ return False
81
+ problem_id = int(d["problem_id"])
82
+ if problem_id < 0 or problem_id >= DATASET_SIZE:
83
+ return False
84
+
85
+ if problem_id in seen_ids:
86
+ return False # Duplicate submission.
87
+ seen_ids.add(problem_id)
88
+
89
+ return True