import json import os import string DATASET_SIZE = 120 MIN_INPUT_LENGTH = 2 MAX_INPUT_LENGTH = 20 MIN_SUBMISSION_SIZE = 1 MAX_SUBMISSION_SIZE = 1024 * 1024 * 120 # 120 MB. MAX_SINGLE_SUBMISSION_SIZE = 1024 * 1024 # 1MB. MAX_SUBMISSION_LINES = DATASET_SIZE + 1 # Allow empty line. def is_valid( s: str, min_length: int = MIN_INPUT_LENGTH, max_length: int = MAX_INPUT_LENGTH, ) -> bool: """ @brief Checks whether the given string is valid. @param s The string to validate. @return True iff all characters are in [a-zA-Z0-9], spaces, or '.' and '-', and the length if between min length and max length. """ characters = [c for c in s] # Not using the length from len(.) as that includes unicode characters. if len(characters) < min_length or len(characters) > max_length: return False # Very important: We delimit using underscores. So these _CANNOT_ be allowed in sanitised strings. ALLOWED = ( [c for c in string.ascii_lowercase] + [c for c in string.ascii_uppercase] + [c for c in string.digits] + [" ", ".", "-"] ) for c in s: if c not in ALLOWED: return False return True def is_submission_file_valid(submission_path: str) -> bool: """ @brief Checks whether the given submission file is valid. @param submission_path The path to the submission file. @return True iff the file is within the size constraints, a JSONL, and every line is no longer than the fixed maximum bound. """ if not os.path.exists(submission_path): return False submission_size = os.stat(submission_path).st_size if submission_size < MIN_SUBMISSION_SIZE or submission_size > MAX_SUBMISSION_SIZE: return False with open(submission_path, "r") as f: # Not using readlines() to avoid consuming a large buffer at once. n_lines = 0 seen_ids = set() while len(line := f.readline(MAX_SINGLE_SUBMISSION_SIZE)) > 0: n_lines += 1 if n_lines > MAX_SUBMISSION_LINES: return False if not line.startswith("{") or not line.endswith("}"): return False d = json.loads(line) if set(d.keys()) != set(["problem_id", "solution"]): return False if not ((type(d["problem_id"]) is str or type(d["problem_id"]) is int) and type(d["solution"] is str)): return False if not d["problem_id"].isdigit(): return False problem_id = int(d["problem_id"]) if problem_id < 0 or problem_id >= DATASET_SIZE: return False if problem_id in seen_ids: return False # Duplicate submission. seen_ids.add(problem_id) return True