import json import os import re from datetime import datetime, timezone from pathlib import Path from src.display.formatting import styled_error, styled_message, styled_warning from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO, PROMPT_VERSIONS, PREDICTIONS_REPO from src.submission.check_validity import already_submitted_models, is_model_on_hub, get_model_properties REQUESTED_MODELS = None def read_configuration(file_paths): configuration_file_paths = list(filter(lambda file_path: file_path.name.endswith(".json"), file_paths or [])) if len(configuration_file_paths) != 1: return None, None, None, None, None, styled_error(f"Expected exactly one configuration file but found {len(configuration_file_paths)}!") configuration_file_path = file_paths.pop(file_paths.index(configuration_file_paths[0])) try: with open(configuration_file_path.name, "r", encoding="utf-8") as f: data = json.load(f) except Exception: return None, None, None, None, None, styled_error("Failed to read configuration file!") try: model_name = data["model_name"] model_args = { **dict({tuple(arg.split("=")) for arg in data["config"].get("model_args", "").split(",") if len(arg) > 0}), "revision": data["config"]["model_revision"], "trust_remote_code": True, "cache_dir": None } base_model = model_args.pop("pretrained") model_on_hub, error, _ = is_model_on_hub(model_name=base_model, model_args=model_args, token=TOKEN, test_tokenizer=True) if not model_on_hub: return None, None, model_name, None, None, styled_error(f"Model {model_name} {error}") limit = data["config"]["limit"] if limit is not None: return None, None, model_name, None, None, styled_error(f"Only full results are accepted but found a specified limit of {limit}!") prediction_files = {} versions = {} n_shots = {} for task_name, _ in data["configs"].items(): sample_files = list(filter(lambda file_path: re.search(rf"samples_{task_name}_.*\.jsonl", file_path.name), file_paths)) if len(sample_files) == 0: return None, None, model_name, None, None, styled_error(f"No prediction file found for configured task {task_name}!") prediction_files[task_name] = str(file_paths.pop(file_paths.index(sample_files[0]))) versions[task_name] = data["versions"][task_name] n_shots[task_name] = data["n-shot"][task_name] if len(prediction_files) == 0: return None, None, model_name, None, None, styled_error("No tasks found in configuration!") versions = set(versions.values()) if len(versions) != 1: return None, None, model_name, None, None, styled_error(f"All tasks should have the same version but found {versions}!") version = list(versions)[0] if version not in PROMPT_VERSIONS: return None, None, model_name, None, None, styled_error(f"Unknown version {version}, should be one of {PROMPT_VERSIONS}!") n_shots = set(n_shots.values()) if len(n_shots) != 1: return None, None, model_name, version, None, styled_error(f"All tasks should have the same number of shots but found {n_shots}!") n_shot = list(n_shots)[0] except KeyError: return None, None, model_name, None, None, styled_error("Wrong configuration file format!") if len(file_paths) > 0: ignored_files = [Path(file_path).name for file_path in file_paths] return data, prediction_files, model_name, version, n_shot, styled_warning(f"The following files will be ignored: {ignored_files}") return data, prediction_files, model_name, version, n_shot, styled_message("Files parsed successfully, verify that read metadata is correct before submitting") def add_new_eval( model_training: str, maltese_training: str, language_count: int, configuration: dict, prediction_files: dict[str, str], ): global REQUESTED_MODELS if not REQUESTED_MODELS: REQUESTED_MODELS = already_submitted_models(EVAL_REQUESTS_PATH) current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%S.%f") if configuration is None or configuration == {} or prediction_files is None or prediction_files == {}: return styled_error("No files selected for upload, please upload an output folder (or wait for the files to finish uploading).") if model_training is None or model_training == "": return styled_error("Please select the model's overall training.") if maltese_training is None or maltese_training == "": return styled_error("Please select the model's Maltese training.") if language_count is None or language_count < 1: language_count = None model_name, revision, precision, seed, prompt_version, n_shot = get_model_properties(configuration) model_id = configuration["model_name"] # Seems good, creating the eval print("Adding new eval") # Check for duplicate submission if f"{model_name}_{revision}_{precision}_{seed}_{prompt_version}_{n_shot}" in REQUESTED_MODELS: return styled_warning("This model has been already submitted.") request = { "model": model_id, "model_args": dict({tuple(arg.split("=")) for arg in configuration["config"].get("model_args", "").split(",") if len(arg) > 0}), "revision": revision, "precision": precision, "seed": seed, "n_shot": n_shot, "prompt_version": prompt_version, "tasks": list(configuration["configs"].keys()), "model_training": model_training, "maltese_training": maltese_training, "language_count": language_count, "submitted_time": current_time, "status": "PENDING", } for task_name, file_path in prediction_files.items(): print(f"Uploading {model_id} {task_name} prediction file") API.upload_file( path_or_fileobj=file_path, path_in_repo=f"{n_shot}-shot_{prompt_version}/{model_name}_{revision}_{precision}/{seed}-seed/samples_{task_name}_{current_time}.jsonl", repo_id=PREDICTIONS_REPO, repo_type="dataset", commit_message=f"Add {configuration['model_name']} {task_name} {n_shot}-shot outputs", ) print(f"Creating {model_id} configruation file") OUT_DIR = f"{EVAL_REQUESTS_PATH}/{model_name}" os.makedirs(OUT_DIR, exist_ok=True) out_path = f"{OUT_DIR}/requests_{model_name}_{revision}_{precision}_{n_shot}shot_{prompt_version}_{seed}seed_{current_time}.json" with open(out_path, "w") as f: f.write(json.dumps({"leaderboard": request, "configuration": configuration}, ensure_ascii=False, indent=2)) print(f"Uploading {model_id} configuration file") API.upload_file( path_or_fileobj=out_path, path_in_repo=out_path.split("eval-queue/")[1], repo_id=QUEUE_REPO, repo_type="dataset", commit_message=f"Add {configuration['model_name']} {n_shot}-shot to eval queue", ) # Remove the local file os.remove(out_path) return styled_message( "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list." )