Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Some changes to validation logic.
Browse files- app.py +49 -12
- src/display/utils.py +4 -19
- src/submission/submit.py +29 -29
- src/validation/__init__.py +0 -0
- src/validation/validate.py +89 -0
app.py
CHANGED
@@ -3,6 +3,7 @@ import pandas as pd
|
|
3 |
from apscheduler.schedulers.background import BackgroundScheduler
|
4 |
from gradio_leaderboard import ColumnFilter, Leaderboard, SelectColumns
|
5 |
|
|
|
6 |
from src.about import CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, EVALUATION_QUEUE_TEXT, INTRODUCTION_TEXT, TITLE
|
7 |
from src.datamodel.data import F1Data
|
8 |
from src.display.css_html_js import custom_css
|
@@ -11,21 +12,26 @@ from src.envs import API, CODE_PROBLEMS_REPO, REPO_ID, RESULTS_REPO, SUBMISSIONS
|
|
11 |
from src.logger import get_logger
|
12 |
from src.populate import get_leaderboard_df
|
13 |
from src.submission.submit import add_new_solutions
|
|
|
14 |
|
15 |
logger = get_logger(__name__)
|
16 |
|
|
|
17 |
SPLIT = "warmup" # TODO temp
|
18 |
-
SKIP_VALIDATION = True # TODO temp
|
19 |
|
20 |
|
21 |
def restart_space():
|
22 |
API.restart_space(repo_id=REPO_ID)
|
23 |
|
24 |
|
25 |
-
lbdb = F1Data(
|
|
|
|
|
|
|
|
|
|
|
26 |
leaderboard_df = get_leaderboard_df(RESULTS_REPO)
|
27 |
|
28 |
-
|
29 |
logger.info("Initialized LBDB")
|
30 |
|
31 |
|
@@ -117,9 +123,47 @@ with demo:
|
|
117 |
submit_button = gr.Button("Submit")
|
118 |
submission_result = gr.Markdown()
|
119 |
|
120 |
-
def add_solution_cbk(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
return add_new_solutions(
|
122 |
-
lbdb,
|
|
|
|
|
|
|
|
|
|
|
123 |
)
|
124 |
|
125 |
submit_button.click(
|
@@ -140,13 +184,6 @@ with demo:
|
|
140 |
value=CITATION_BUTTON_TEXT.strip(),
|
141 |
elem_id="citation-block",
|
142 |
)
|
143 |
-
# citation_button = gr.Textbox(
|
144 |
-
# value=CITATION_BUTTON_TEXT,
|
145 |
-
# # label=CITATION_BUTTON_LABEL,
|
146 |
-
# lines=20,
|
147 |
-
# elem_id="citation-button",
|
148 |
-
# show_copy_button=True,
|
149 |
-
# )
|
150 |
|
151 |
logger.info("Scheduler")
|
152 |
scheduler = BackgroundScheduler()
|
|
|
3 |
from apscheduler.schedulers.background import BackgroundScheduler
|
4 |
from gradio_leaderboard import ColumnFilter, Leaderboard, SelectColumns
|
5 |
|
6 |
+
from display.formatting import styled_error
|
7 |
from src.about import CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, EVALUATION_QUEUE_TEXT, INTRODUCTION_TEXT, TITLE
|
8 |
from src.datamodel.data import F1Data
|
9 |
from src.display.css_html_js import custom_css
|
|
|
12 |
from src.logger import get_logger
|
13 |
from src.populate import get_leaderboard_df
|
14 |
from src.submission.submit import add_new_solutions
|
15 |
+
from src.validation.validate import MAX_INPUT_LENGTH, MIN_INPUT_LENGTH, is_submission_file_valid, is_valid
|
16 |
|
17 |
logger = get_logger(__name__)
|
18 |
|
19 |
+
ENSURE_ALL_PRESENT = False # TODO: Switch to True.
|
20 |
SPLIT = "warmup" # TODO temp
|
|
|
21 |
|
22 |
|
23 |
def restart_space():
|
24 |
API.restart_space(repo_id=REPO_ID)
|
25 |
|
26 |
|
27 |
+
lbdb = F1Data(
|
28 |
+
cp_ds_name=CODE_PROBLEMS_REPO,
|
29 |
+
sub_ds_name=SUBMISSIONS_REPO,
|
30 |
+
res_ds_name=RESULTS_REPO,
|
31 |
+
split=SPLIT,
|
32 |
+
)
|
33 |
leaderboard_df = get_leaderboard_df(RESULTS_REPO)
|
34 |
|
|
|
35 |
logger.info("Initialized LBDB")
|
36 |
|
37 |
|
|
|
123 |
submit_button = gr.Button("Submit")
|
124 |
submission_result = gr.Markdown()
|
125 |
|
126 |
+
def add_solution_cbk(
|
127 |
+
system_name: str,
|
128 |
+
org: str,
|
129 |
+
sys_type: str,
|
130 |
+
submission_path: str,
|
131 |
+
):
|
132 |
+
|
133 |
+
try:
|
134 |
+
# Validating the submission file.
|
135 |
+
if len(submission_path) == 0:
|
136 |
+
return styled_error("Please upload JSONL submission file.")
|
137 |
+
|
138 |
+
if not is_submission_file_valid(submission_path):
|
139 |
+
return styled_error("Failed to read JSONL submission file. Please try again later.")
|
140 |
+
|
141 |
+
# Validating all user-supplied arguments.
|
142 |
+
for val, val_name in [
|
143 |
+
(system_name, "System name"),
|
144 |
+
(org, "Organisation name"),
|
145 |
+
(sys_type, "System type"),
|
146 |
+
]:
|
147 |
+
if len(val) == 0:
|
148 |
+
return styled_error(f"Please fill in the '{val_name}' field.")
|
149 |
+
|
150 |
+
if not is_valid(val):
|
151 |
+
return styled_error(
|
152 |
+
f"{val_name} is invalid! Must only contain characters [a-zA-Z0-9], spaces, "
|
153 |
+
+ "or the special characters '-' and '.', and be of length between "
|
154 |
+
+ f"{MIN_INPUT_LENGTH} and {MAX_INPUT_LENGTH}."
|
155 |
+
)
|
156 |
+
except Exception:
|
157 |
+
logger.warning("Failed to process user submission", exc_info=True)
|
158 |
+
return styled_error("An error occurred. Please try again later.") # Intentionally vague.
|
159 |
+
|
160 |
return add_new_solutions(
|
161 |
+
lbdb,
|
162 |
+
system_name,
|
163 |
+
org,
|
164 |
+
sys_type,
|
165 |
+
submission_path,
|
166 |
+
ensure_all_present=ENSURE_ALL_PRESENT,
|
167 |
)
|
168 |
|
169 |
submit_button.click(
|
|
|
184 |
value=CITATION_BUTTON_TEXT.strip(),
|
185 |
elem_id="citation-block",
|
186 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
|
188 |
logger.info("Scheduler")
|
189 |
scheduler = BackgroundScheduler()
|
src/display/utils.py
CHANGED
@@ -2,7 +2,7 @@ from dataclasses import dataclass
|
|
2 |
from enum import Enum
|
3 |
|
4 |
|
5 |
-
def
|
6 |
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
|
7 |
|
8 |
|
@@ -65,27 +65,12 @@ class ModelType(Enum):
|
|
65 |
return ModelType.Other
|
66 |
|
67 |
|
68 |
-
class WeightType(Enum):
|
69 |
-
Adapter = ModelDetails("Adapter")
|
70 |
-
Original = ModelDetails("Original")
|
71 |
-
Delta = ModelDetails("Delta")
|
72 |
-
|
73 |
-
|
74 |
class Precision(Enum):
|
75 |
float16 = ModelDetails("float16")
|
76 |
bfloat16 = ModelDetails("bfloat16")
|
77 |
Unknown = ModelDetails("?")
|
78 |
|
79 |
-
def from_str(precision):
|
80 |
-
if precision in ["torch.float16", "float16"]:
|
81 |
-
return Precision.float16
|
82 |
-
if precision in ["torch.bfloat16", "bfloat16"]:
|
83 |
-
return Precision.bfloat16
|
84 |
-
return Precision.Unknown
|
85 |
-
|
86 |
-
|
87 |
-
# Column selection
|
88 |
-
COLS = [c.name for c in _fields(AutoEvalColumn) if not c.hidden]
|
89 |
|
90 |
-
|
91 |
-
|
|
|
|
2 |
from enum import Enum
|
3 |
|
4 |
|
5 |
+
def fields(raw_class):
|
6 |
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
|
7 |
|
8 |
|
|
|
65 |
return ModelType.Other
|
66 |
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
class Precision(Enum):
|
69 |
float16 = ModelDetails("float16")
|
70 |
bfloat16 = ModelDetails("bfloat16")
|
71 |
Unknown = ModelDetails("?")
|
72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
|
74 |
+
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
|
75 |
+
EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
|
76 |
+
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
|
src/submission/submit.py
CHANGED
@@ -5,37 +5,40 @@ import pandas as pd
|
|
5 |
from datasets import Dataset
|
6 |
from pandas.api.types import is_integer_dtype
|
7 |
|
|
|
8 |
from src.datamodel.data import F1Data
|
9 |
from src.display.formatting import styled_error, styled_message
|
10 |
from src.display.utils import ModelType
|
11 |
from src.envs import SUBMISSIONS_REPO
|
12 |
from src.logger import get_logger
|
|
|
13 |
|
14 |
logger = get_logger(__name__)
|
15 |
|
16 |
|
17 |
-
def
|
18 |
-
|
|
|
|
|
|
|
19 |
expected_cols = ["problem_id", "solution"]
|
20 |
|
21 |
if set(pd_ds.columns) != set(expected_cols):
|
22 |
-
return f"Expected attributes: {expected_cols}, Got: {pd_ds.columns.tolist()}"
|
23 |
|
24 |
if not is_integer_dtype(pd_ds["problem_id"]):
|
25 |
-
return "problem_id must be str convertible to int"
|
26 |
|
27 |
if any(type(v) is not str for v in pd_ds["solution"]):
|
28 |
-
return "solution must be of type str"
|
29 |
|
30 |
submitted_ids = set(pd_ds.problem_id.astype(str))
|
31 |
if submitted_ids != lbdb.code_problem_ids:
|
32 |
missing = lbdb.code_problem_ids - submitted_ids
|
33 |
unknown = submitted_ids - lbdb.code_problem_ids
|
34 |
-
|
35 |
if len(pd_ds) > len(lbdb.code_problem_ids):
|
36 |
-
return "Duplicate problem IDs exist in uploaded file"
|
37 |
-
|
38 |
-
return None
|
39 |
|
40 |
|
41 |
def add_new_solutions(
|
@@ -44,36 +47,33 @@ def add_new_solutions(
|
|
44 |
org: str,
|
45 |
sys_type: str,
|
46 |
submission_path: str,
|
47 |
-
|
48 |
):
|
49 |
-
logger.info(
|
50 |
-
|
51 |
-
|
52 |
|
53 |
-
|
54 |
-
|
|
|
|
|
55 |
|
56 |
-
if not sys_type:
|
57 |
-
return styled_error("Please select system type")
|
58 |
sys_type = ModelType.from_str(sys_type).name
|
59 |
|
60 |
-
if not submission_path:
|
61 |
-
return styled_error("Please upload JSONL solutions file")
|
62 |
-
|
63 |
try:
|
64 |
submission_df = pd.read_json(submission_path, lines=True)
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
|
73 |
submission_id = f"{system_name}_{org}_{sys_type}_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"
|
74 |
|
75 |
-
# Seems good, creating the eval
|
76 |
-
|
77 |
submission_ts = time.time_ns()
|
78 |
|
79 |
def add_info(row):
|
|
|
5 |
from datasets import Dataset
|
6 |
from pandas.api.types import is_integer_dtype
|
7 |
|
8 |
+
from app import is_valid
|
9 |
from src.datamodel.data import F1Data
|
10 |
from src.display.formatting import styled_error, styled_message
|
11 |
from src.display.utils import ModelType
|
12 |
from src.envs import SUBMISSIONS_REPO
|
13 |
from src.logger import get_logger
|
14 |
+
from validation.validate import is_submission_file_valid
|
15 |
|
16 |
logger = get_logger(__name__)
|
17 |
|
18 |
|
19 |
+
def _validate_all_submissions_present(
|
20 |
+
lbdb: F1Data,
|
21 |
+
pd_ds: pd.DataFrame,
|
22 |
+
):
|
23 |
+
logger.info(f"Validating DS size {len(pd_ds)} columns {pd_ds.columns} set {set(pd_ds.columns)}")
|
24 |
expected_cols = ["problem_id", "solution"]
|
25 |
|
26 |
if set(pd_ds.columns) != set(expected_cols):
|
27 |
+
return ValueError(f"Expected attributes: {expected_cols}, Got: {pd_ds.columns.tolist()}")
|
28 |
|
29 |
if not is_integer_dtype(pd_ds["problem_id"]):
|
30 |
+
return ValueError("problem_id must be str convertible to int")
|
31 |
|
32 |
if any(type(v) is not str for v in pd_ds["solution"]):
|
33 |
+
return ValueError("solution must be of type str")
|
34 |
|
35 |
submitted_ids = set(pd_ds.problem_id.astype(str))
|
36 |
if submitted_ids != lbdb.code_problem_ids:
|
37 |
missing = lbdb.code_problem_ids - submitted_ids
|
38 |
unknown = submitted_ids - lbdb.code_problem_ids
|
39 |
+
raise ValueError(f"Mismatched problem IDs: {len(missing)} missing, {len(unknown)} unknown")
|
40 |
if len(pd_ds) > len(lbdb.code_problem_ids):
|
41 |
+
return ValueError("Duplicate problem IDs exist in uploaded file")
|
|
|
|
|
42 |
|
43 |
|
44 |
def add_new_solutions(
|
|
|
47 |
org: str,
|
48 |
sys_type: str,
|
49 |
submission_path: str,
|
50 |
+
ensure_all_present: bool = False,
|
51 |
):
|
52 |
+
logger.info(
|
53 |
+
f"Adding new submission! {system_name=}, {org=}, {sys_type=} and {submission_path=}",
|
54 |
+
)
|
55 |
|
56 |
+
# Double-checking.
|
57 |
+
for val in [system_name, org, sys_type]:
|
58 |
+
assert is_valid(val)
|
59 |
+
assert is_submission_file_valid(submission_path)
|
60 |
|
|
|
|
|
61 |
sys_type = ModelType.from_str(sys_type).name
|
62 |
|
|
|
|
|
|
|
63 |
try:
|
64 |
submission_df = pd.read_json(submission_path, lines=True)
|
65 |
+
if ensure_all_present:
|
66 |
+
_validate_all_submissions_present(lbdb=lbdb, pd_ds=submission_df)
|
67 |
+
except Exception:
|
68 |
+
logger.warning("Failed to parse submission DF!", exc_info=True)
|
69 |
+
return styled_error(
|
70 |
+
"An error occurred. Please try again later."
|
71 |
+
) # Use same message as external error. Avoid infoleak.
|
72 |
|
73 |
submission_id = f"{system_name}_{org}_{sys_type}_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"
|
74 |
|
75 |
+
# Seems good, creating the eval.
|
76 |
+
logger.info(f"Adding new submission: {submission_id}")
|
77 |
submission_ts = time.time_ns()
|
78 |
|
79 |
def add_info(row):
|
src/validation/__init__.py
ADDED
File without changes
|
src/validation/validate.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
import string
|
4 |
+
|
5 |
+
DATASET_SIZE = 120
|
6 |
+
|
7 |
+
MIN_INPUT_LENGTH = 2
|
8 |
+
MAX_INPUT_LENGTH = 20
|
9 |
+
|
10 |
+
MIN_SUBMISSION_SIZE = 1
|
11 |
+
MAX_SUBMISSION_SIZE = 1024 * 1024 * 120 # 120 MB.
|
12 |
+
MAX_SINGLE_SUBMISSION_SIZE = 1024 * 1024 # 1MB.
|
13 |
+
MAX_SUBMISSION_LINES = DATASET_SIZE + 1 # Allow empty line.
|
14 |
+
|
15 |
+
|
16 |
+
def is_valid(
|
17 |
+
s: str,
|
18 |
+
min_length: int = MIN_INPUT_LENGTH,
|
19 |
+
max_length: int = MAX_INPUT_LENGTH,
|
20 |
+
) -> bool:
|
21 |
+
"""
|
22 |
+
@brief Checks whether the given string is valid.
|
23 |
+
@param s The string to validate.
|
24 |
+
@return True iff all characters are in [a-zA-Z0-9], spaces, or '.' and '-', and the length if between
|
25 |
+
min length and max length.
|
26 |
+
"""
|
27 |
+
|
28 |
+
characters = [c for c in s] # Not using the length from len(.) as that includes unicode characters.
|
29 |
+
if len(characters) < min_length or len(characters) > max_length:
|
30 |
+
return False
|
31 |
+
|
32 |
+
# Very important: We delimit using underscores. So these _CANNOT_ be allowed in sanitised strings.
|
33 |
+
ALLOWED = (
|
34 |
+
[c for c in string.ascii_lowercase]
|
35 |
+
+ [c for c in string.ascii_uppercase]
|
36 |
+
+ [c for c in string.digits]
|
37 |
+
+ [" ", ".", "-"]
|
38 |
+
)
|
39 |
+
for c in s:
|
40 |
+
if c not in ALLOWED:
|
41 |
+
return False
|
42 |
+
return True
|
43 |
+
|
44 |
+
|
45 |
+
def is_submission_file_valid(submission_path: str) -> bool:
|
46 |
+
"""
|
47 |
+
@brief Checks whether the given submission file is valid.
|
48 |
+
@param submission_path The path to the submission file.
|
49 |
+
@return True iff the file is within the size constraints, a JSONL, and every line is no longer than
|
50 |
+
the fixed maximum bound.
|
51 |
+
"""
|
52 |
+
|
53 |
+
if not os.path.exists(submission_path):
|
54 |
+
return False
|
55 |
+
|
56 |
+
submission_size = os.stat(submission_path).st_size
|
57 |
+
if submission_size < MIN_SUBMISSION_SIZE or submission_size > MAX_SUBMISSION_SIZE:
|
58 |
+
return False
|
59 |
+
|
60 |
+
with open(submission_path, "r") as f:
|
61 |
+
|
62 |
+
# Not using readlines() to avoid consuming a large buffer at once.
|
63 |
+
n_lines = 0
|
64 |
+
seen_ids = set()
|
65 |
+
while len(line := f.readline(MAX_SINGLE_SUBMISSION_SIZE)) > 0:
|
66 |
+
n_lines += 1
|
67 |
+
if n_lines > MAX_SUBMISSION_LINES:
|
68 |
+
return False
|
69 |
+
|
70 |
+
if not line.startswith("{") or not line.endswith("}"):
|
71 |
+
return False
|
72 |
+
|
73 |
+
d = json.loads(line)
|
74 |
+
if set(d.keys()) != set(["problem_id", "solution"]):
|
75 |
+
return False
|
76 |
+
|
77 |
+
if not ((type(d["problem_id"]) is str or type(d["problem_id"]) is int) and type(d["solution"] is str)):
|
78 |
+
return False
|
79 |
+
if not d["problem_id"].isdigit():
|
80 |
+
return False
|
81 |
+
problem_id = int(d["problem_id"])
|
82 |
+
if problem_id < 0 or problem_id >= DATASET_SIZE:
|
83 |
+
return False
|
84 |
+
|
85 |
+
if problem_id in seen_ids:
|
86 |
+
return False # Duplicate submission.
|
87 |
+
seen_ids.add(problem_id)
|
88 |
+
|
89 |
+
return True
|