KurtMica's picture
Model output submission.
236bb17
import json
import os
import re
from datetime import datetime, timezone
from pathlib import Path
from src.display.formatting import styled_error, styled_message, styled_warning
from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO, PROMPT_VERSIONS, PREDICTIONS_REPO
from src.submission.check_validity import already_submitted_models, is_model_on_hub, get_model_properties
REQUESTED_MODELS = None
def read_configuration(file_paths):
configuration_file_paths = list(filter(lambda file_path: file_path.name.endswith(".json"), file_paths or []))
if len(configuration_file_paths) != 1:
return None, None, None, None, None, styled_error(f"Expected exactly one configuration file but found {len(configuration_file_paths)}!")
configuration_file_path = file_paths.pop(file_paths.index(configuration_file_paths[0]))
try:
with open(configuration_file_path.name, "r", encoding="utf-8") as f:
data = json.load(f)
except Exception:
return None, None, None, None, None, styled_error("Failed to read configuration file!")
try:
model_name = data["model_name"]
model_args = {
**dict({tuple(arg.split("=")) for arg in data["config"].get("model_args", "").split(",") if len(arg) > 0}),
"revision": data["config"]["model_revision"],
"trust_remote_code": True,
"cache_dir": None
}
base_model = model_args.pop("pretrained")
model_on_hub, error, _ = is_model_on_hub(model_name=base_model, model_args=model_args, token=TOKEN, test_tokenizer=True)
if not model_on_hub:
return None, None, model_name, None, None, styled_error(f"Model {model_name} {error}")
limit = data["config"]["limit"]
if limit is not None:
return None, None, model_name, None, None, styled_error(f"Only full results are accepted but found a specified limit of {limit}!")
prediction_files = {}
versions = {}
n_shots = {}
for task_name, _ in data["configs"].items():
sample_files = list(filter(lambda file_path: re.search(rf"samples_{task_name}_.*\.jsonl", file_path.name), file_paths))
if len(sample_files) == 0:
return None, None, model_name, None, None, styled_error(f"No prediction file found for configured task {task_name}!")
prediction_files[task_name] = str(file_paths.pop(file_paths.index(sample_files[0])))
versions[task_name] = data["versions"][task_name]
n_shots[task_name] = data["n-shot"][task_name]
if len(prediction_files) == 0:
return None, None, model_name, None, None, styled_error("No tasks found in configuration!")
versions = set(versions.values())
if len(versions) != 1:
return None, None, model_name, None, None, styled_error(f"All tasks should have the same version but found {versions}!")
version = list(versions)[0]
if version not in PROMPT_VERSIONS:
return None, None, model_name, None, None, styled_error(f"Unknown version {version}, should be one of {PROMPT_VERSIONS}!")
n_shots = set(n_shots.values())
if len(n_shots) != 1:
return None, None, model_name, version, None, styled_error(f"All tasks should have the same number of shots but found {n_shots}!")
n_shot = list(n_shots)[0]
except KeyError:
return None, None, model_name, None, None, styled_error("Wrong configuration file format!")
if len(file_paths) > 0:
ignored_files = [Path(file_path).name for file_path in file_paths]
return data, prediction_files, model_name, version, n_shot, styled_warning(f"The following files will be ignored: {ignored_files}")
return data, prediction_files, model_name, version, n_shot, styled_message("Files parsed successfully, verify that read metadata is correct before submitting")
def add_new_eval(
model_training: str,
maltese_training: str,
language_count: int,
configuration: dict,
prediction_files: dict[str, str],
):
global REQUESTED_MODELS
if not REQUESTED_MODELS:
REQUESTED_MODELS = already_submitted_models(EVAL_REQUESTS_PATH)
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%S.%f")
if configuration is None or configuration == {} or prediction_files is None or prediction_files == {}:
return styled_error("No files selected for upload, please upload an output folder (or wait for the files to finish uploading).")
if model_training is None or model_training == "":
return styled_error("Please select the model's overall training.")
if maltese_training is None or maltese_training == "":
return styled_error("Please select the model's Maltese training.")
if language_count is None or language_count < 1:
language_count = None
model_name, revision, precision, seed, prompt_version, n_shot = get_model_properties(configuration)
model_id = configuration["model_name"]
# Seems good, creating the eval
print("Adding new eval")
# Check for duplicate submission
if f"{model_name}_{revision}_{precision}_{seed}_{prompt_version}_{n_shot}" in REQUESTED_MODELS:
return styled_warning("This model has been already submitted.")
request = {
"model": model_id,
"model_args": dict({tuple(arg.split("=")) for arg in configuration["config"].get("model_args", "").split(",") if len(arg) > 0}),
"revision": revision,
"precision": precision,
"seed": seed,
"n_shot": n_shot,
"prompt_version": prompt_version,
"tasks": list(configuration["configs"].keys()),
"model_training": model_training,
"maltese_training": maltese_training,
"language_count": language_count,
"submitted_time": current_time,
"status": "PENDING",
}
for task_name, file_path in prediction_files.items():
print(f"Uploading {model_id} {task_name} prediction file")
API.upload_file(
path_or_fileobj=file_path,
path_in_repo=f"{n_shot}-shot_{prompt_version}/{model_name}_{revision}_{precision}/{seed}-seed/samples_{task_name}_{current_time}.jsonl",
repo_id=PREDICTIONS_REPO,
repo_type="dataset",
commit_message=f"Add {configuration['model_name']} {task_name} {n_shot}-shot outputs",
)
print(f"Creating {model_id} configruation file")
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{model_name}"
os.makedirs(OUT_DIR, exist_ok=True)
out_path = f"{OUT_DIR}/requests_{model_name}_{revision}_{precision}_{n_shot}shot_{prompt_version}_{seed}seed_{current_time}.json"
with open(out_path, "w") as f:
f.write(json.dumps({"leaderboard": request, "configuration": configuration}, ensure_ascii=False, indent=2))
print(f"Uploading {model_id} configuration file")
API.upload_file(
path_or_fileobj=out_path,
path_in_repo=out_path.split("eval-queue/")[1],
repo_id=QUEUE_REPO,
repo_type="dataset",
commit_message=f"Add {configuration['model_name']} {n_shot}-shot to eval queue",
)
# Remove the local file
os.remove(out_path)
return styled_message(
"Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
)