|
import json |
|
import os |
|
import re |
|
from datetime import datetime, timezone |
|
from pathlib import Path |
|
|
|
from src.display.formatting import styled_error, styled_message, styled_warning |
|
from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO, PROMPT_VERSIONS, PREDICTIONS_REPO |
|
from src.submission.check_validity import already_submitted_models, is_model_on_hub, get_model_properties |
|
|
|
REQUESTED_MODELS = None |
|
|
|
|
|
def read_configuration(file_paths): |
|
configuration_file_paths = list(filter(lambda file_path: file_path.name.endswith(".json"), file_paths or [])) |
|
if len(configuration_file_paths) != 1: |
|
return None, None, None, None, None, styled_error(f"Expected exactly one configuration file but found {len(configuration_file_paths)}!") |
|
|
|
configuration_file_path = file_paths.pop(file_paths.index(configuration_file_paths[0])) |
|
|
|
try: |
|
with open(configuration_file_path.name, "r", encoding="utf-8") as f: |
|
data = json.load(f) |
|
except Exception: |
|
return None, None, None, None, None, styled_error("Failed to read configuration file!") |
|
|
|
try: |
|
model_name = data["model_name"] |
|
model_args = { |
|
**dict({tuple(arg.split("=")) for arg in data["config"].get("model_args", "").split(",") if len(arg) > 0}), |
|
"revision": data["config"]["model_revision"], |
|
"trust_remote_code": True, |
|
"cache_dir": None |
|
} |
|
base_model = model_args.pop("pretrained") |
|
model_on_hub, error, _ = is_model_on_hub(model_name=base_model, model_args=model_args, token=TOKEN, test_tokenizer=True) |
|
if not model_on_hub: |
|
return None, None, model_name, None, None, styled_error(f"Model {model_name} {error}") |
|
|
|
limit = data["config"]["limit"] |
|
if limit is not None: |
|
return None, None, model_name, None, None, styled_error(f"Only full results are accepted but found a specified limit of {limit}!") |
|
|
|
prediction_files = {} |
|
versions = {} |
|
n_shots = {} |
|
for task_name, _ in data["configs"].items(): |
|
sample_files = list(filter(lambda file_path: re.search(rf"samples_{task_name}_.*\.jsonl", file_path.name), file_paths)) |
|
if len(sample_files) == 0: |
|
return None, None, model_name, None, None, styled_error(f"No prediction file found for configured task {task_name}!") |
|
|
|
prediction_files[task_name] = str(file_paths.pop(file_paths.index(sample_files[0]))) |
|
|
|
versions[task_name] = data["versions"][task_name] |
|
n_shots[task_name] = data["n-shot"][task_name] |
|
if len(prediction_files) == 0: |
|
return None, None, model_name, None, None, styled_error("No tasks found in configuration!") |
|
|
|
versions = set(versions.values()) |
|
if len(versions) != 1: |
|
return None, None, model_name, None, None, styled_error(f"All tasks should have the same version but found {versions}!") |
|
version = list(versions)[0] |
|
if version not in PROMPT_VERSIONS: |
|
return None, None, model_name, None, None, styled_error(f"Unknown version {version}, should be one of {PROMPT_VERSIONS}!") |
|
|
|
n_shots = set(n_shots.values()) |
|
if len(n_shots) != 1: |
|
return None, None, model_name, version, None, styled_error(f"All tasks should have the same number of shots but found {n_shots}!") |
|
n_shot = list(n_shots)[0] |
|
except KeyError: |
|
return None, None, model_name, None, None, styled_error("Wrong configuration file format!") |
|
|
|
if len(file_paths) > 0: |
|
ignored_files = [Path(file_path).name for file_path in file_paths] |
|
return data, prediction_files, model_name, version, n_shot, styled_warning(f"The following files will be ignored: {ignored_files}") |
|
return data, prediction_files, model_name, version, n_shot, styled_message("Files parsed successfully, verify that read metadata is correct before submitting") |
|
|
|
|
|
def add_new_eval( |
|
model_training: str, |
|
maltese_training: str, |
|
language_count: int, |
|
configuration: dict, |
|
prediction_files: dict[str, str], |
|
): |
|
global REQUESTED_MODELS |
|
if not REQUESTED_MODELS: |
|
REQUESTED_MODELS = already_submitted_models(EVAL_REQUESTS_PATH) |
|
|
|
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%S.%f") |
|
|
|
if configuration is None or configuration == {} or prediction_files is None or prediction_files == {}: |
|
return styled_error("No files selected for upload, please upload an output folder (or wait for the files to finish uploading).") |
|
|
|
if model_training is None or model_training == "": |
|
return styled_error("Please select the model's overall training.") |
|
|
|
if maltese_training is None or maltese_training == "": |
|
return styled_error("Please select the model's Maltese training.") |
|
|
|
if language_count is None or language_count < 1: |
|
language_count = None |
|
|
|
model_name, revision, precision, seed, prompt_version, n_shot = get_model_properties(configuration) |
|
model_id = configuration["model_name"] |
|
|
|
|
|
print("Adding new eval") |
|
|
|
|
|
if f"{model_name}_{revision}_{precision}_{seed}_{prompt_version}_{n_shot}" in REQUESTED_MODELS: |
|
return styled_warning("This model has been already submitted.") |
|
|
|
request = { |
|
"model": model_id, |
|
"model_args": dict({tuple(arg.split("=")) for arg in configuration["config"].get("model_args", "").split(",") if len(arg) > 0}), |
|
"revision": revision, |
|
"precision": precision, |
|
"seed": seed, |
|
"n_shot": n_shot, |
|
"prompt_version": prompt_version, |
|
"tasks": list(configuration["configs"].keys()), |
|
"model_training": model_training, |
|
"maltese_training": maltese_training, |
|
"language_count": language_count, |
|
"submitted_time": current_time, |
|
"status": "PENDING", |
|
} |
|
|
|
for task_name, file_path in prediction_files.items(): |
|
print(f"Uploading {model_id} {task_name} prediction file") |
|
API.upload_file( |
|
path_or_fileobj=file_path, |
|
path_in_repo=f"{n_shot}-shot_{prompt_version}/{model_name}_{revision}_{precision}/{seed}-seed/samples_{task_name}_{current_time}.jsonl", |
|
repo_id=PREDICTIONS_REPO, |
|
repo_type="dataset", |
|
commit_message=f"Add {configuration['model_name']} {task_name} {n_shot}-shot outputs", |
|
) |
|
|
|
print(f"Creating {model_id} configruation file") |
|
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{model_name}" |
|
os.makedirs(OUT_DIR, exist_ok=True) |
|
out_path = f"{OUT_DIR}/requests_{model_name}_{revision}_{precision}_{n_shot}shot_{prompt_version}_{seed}seed_{current_time}.json" |
|
|
|
with open(out_path, "w") as f: |
|
f.write(json.dumps({"leaderboard": request, "configuration": configuration}, ensure_ascii=False, indent=2)) |
|
|
|
print(f"Uploading {model_id} configuration file") |
|
API.upload_file( |
|
path_or_fileobj=out_path, |
|
path_in_repo=out_path.split("eval-queue/")[1], |
|
repo_id=QUEUE_REPO, |
|
repo_type="dataset", |
|
commit_message=f"Add {configuration['model_name']} {n_shot}-shot to eval queue", |
|
) |
|
|
|
|
|
os.remove(out_path) |
|
|
|
return styled_message( |
|
"Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list." |
|
) |
|
|