Spaces:

MLRS
/

MELABench

Running

File size: 7,402 Bytes

3b3db42
 
236bb17
8b1f7a0
236bb17
8b1f7a0
3b3db42
236bb17
 
8b1f7a0
54eae7e
8b1f7a0
54eae7e
236bb17
 
 
 
54eae7e
236bb17
8b1f7a0
236bb17
 
 
 
 
8b1f7a0
236bb17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f171a05
8b1f7a0
236bb17
 
 
 
 
 
 
 
 
 
8b1f7a0
236bb17
8b1f7a0
236bb17
 
8b1f7a0
236bb17
 
8b1f7a0
236bb17
 
8b1f7a0
236bb17
 
 
 
 
8b1f7a0
 
 
 
236bb17
 
 
 
 
 
 
8b1f7a0
 
236bb17
 
 
 
 
 
 
8b1f7a0
236bb17
8b1f7a0
 
236bb17
 
 
 
 
 
 
 
 
 
 
 
8b1f7a0
236bb17
8b1f7a0
 
236bb17
8b1f7a0
236bb17
8b1f7a0
 
 
 
 
236bb17
8b1f7a0

import json
import os
import re
from datetime import datetime, timezone
from pathlib import Path

from src.display.formatting import styled_error, styled_message, styled_warning
from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO, PROMPT_VERSIONS, PREDICTIONS_REPO
from src.submission.check_validity import already_submitted_models, is_model_on_hub, get_model_properties

REQUESTED_MODELS = None


def read_configuration(file_paths):
    configuration_file_paths = list(filter(lambda file_path: file_path.name.endswith(".json"), file_paths or []))
    if len(configuration_file_paths) != 1:
        return None, None, None, None, None, styled_error(f"Expected exactly one configuration file but found {len(configuration_file_paths)}!")

    configuration_file_path = file_paths.pop(file_paths.index(configuration_file_paths[0]))

    try:
        with open(configuration_file_path.name, "r", encoding="utf-8") as f:
            data = json.load(f)
    except Exception:
        return None, None, None, None, None, styled_error("Failed to read configuration file!")

    try:
        model_name = data["model_name"]
        model_args = {
            **dict({tuple(arg.split("=")) for arg in data["config"].get("model_args", "").split(",") if len(arg) > 0}),
            "revision": data["config"]["model_revision"],
            "trust_remote_code": True,
            "cache_dir": None
        }
        base_model = model_args.pop("pretrained")
        model_on_hub, error, _ = is_model_on_hub(model_name=base_model, model_args=model_args, token=TOKEN, test_tokenizer=True)
        if not model_on_hub:
            return None, None, model_name, None, None, styled_error(f"Model {model_name} {error}")

        limit = data["config"]["limit"]
        if limit is not None:
            return None, None, model_name, None, None, styled_error(f"Only full results are accepted but found a specified limit of {limit}!")

        prediction_files = {}
        versions = {}
        n_shots = {}
        for task_name, _ in data["configs"].items():
            sample_files = list(filter(lambda file_path: re.search(rf"samples_{task_name}_.*\.jsonl", file_path.name), file_paths))
            if len(sample_files) == 0:
                return None, None, model_name, None, None, styled_error(f"No prediction file found for configured task {task_name}!")

            prediction_files[task_name] = str(file_paths.pop(file_paths.index(sample_files[0])))

            versions[task_name] = data["versions"][task_name]
            n_shots[task_name] = data["n-shot"][task_name]
        if len(prediction_files) == 0:
            return None, None, model_name, None, None, styled_error("No tasks found in configuration!")

        versions = set(versions.values())
        if len(versions) != 1:
            return None, None, model_name, None, None, styled_error(f"All tasks should have the same version but found {versions}!")
        version = list(versions)[0]
        if version not in PROMPT_VERSIONS:
            return None, None, model_name, None, None, styled_error(f"Unknown version {version}, should be one of {PROMPT_VERSIONS}!")

        n_shots = set(n_shots.values())
        if len(n_shots) != 1:
            return None, None, model_name, version, None, styled_error(f"All tasks should have the same number of shots but found {n_shots}!")
        n_shot = list(n_shots)[0]
    except KeyError:
        return None, None, model_name, None, None, styled_error("Wrong configuration file format!")

    if len(file_paths) > 0:
        ignored_files = [Path(file_path).name for file_path in file_paths]
        return data, prediction_files, model_name, version, n_shot, styled_warning(f"The following files will be ignored: {ignored_files}")
    return data, prediction_files, model_name, version, n_shot, styled_message("Files parsed successfully, verify that read metadata is correct before submitting")


def add_new_eval(
    model_training: str,
    maltese_training: str,
    language_count: int,
    configuration: dict,
    prediction_files: dict[str, str],
):
    global REQUESTED_MODELS
    if not REQUESTED_MODELS:
        REQUESTED_MODELS = already_submitted_models(EVAL_REQUESTS_PATH)

    current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%S.%f")

    if configuration is None or configuration == {} or prediction_files is None or prediction_files == {}:
        return styled_error("No files selected for upload, please upload an output folder (or wait for the files to finish uploading).")

    if model_training is None or model_training == "":
        return styled_error("Please select the model's overall training.")

    if maltese_training is None or maltese_training == "":
        return styled_error("Please select the model's Maltese training.")

    if language_count is None or language_count < 1:
        language_count = None

    model_name, revision, precision, seed, prompt_version, n_shot = get_model_properties(configuration)
    model_id = configuration["model_name"]

    # Seems good, creating the eval
    print("Adding new eval")

    # Check for duplicate submission
    if f"{model_name}_{revision}_{precision}_{seed}_{prompt_version}_{n_shot}" in REQUESTED_MODELS:
        return styled_warning("This model has been already submitted.")

    request = {
        "model": model_id,
        "model_args": dict({tuple(arg.split("=")) for arg in configuration["config"].get("model_args", "").split(",") if len(arg) > 0}),
        "revision": revision,
        "precision": precision,
        "seed": seed,
        "n_shot": n_shot,
        "prompt_version": prompt_version,
        "tasks": list(configuration["configs"].keys()),
        "model_training": model_training,
        "maltese_training": maltese_training,
        "language_count": language_count,
        "submitted_time": current_time,
        "status": "PENDING",
    }

    for task_name, file_path in prediction_files.items():
        print(f"Uploading {model_id} {task_name} prediction file")
        API.upload_file(
            path_or_fileobj=file_path,
            path_in_repo=f"{n_shot}-shot_{prompt_version}/{model_name}_{revision}_{precision}/{seed}-seed/samples_{task_name}_{current_time}.jsonl",
            repo_id=PREDICTIONS_REPO,
            repo_type="dataset",
            commit_message=f"Add {configuration['model_name']} {task_name} {n_shot}-shot outputs",
        )

    print(f"Creating {model_id} configruation file")
    OUT_DIR = f"{EVAL_REQUESTS_PATH}/{model_name}"
    os.makedirs(OUT_DIR, exist_ok=True)
    out_path = f"{OUT_DIR}/requests_{model_name}_{revision}_{precision}_{n_shot}shot_{prompt_version}_{seed}seed_{current_time}.json"

    with open(out_path, "w") as f:
        f.write(json.dumps({"leaderboard": request, "configuration": configuration}, ensure_ascii=False, indent=2))

    print(f"Uploading {model_id} configuration file")
    API.upload_file(
        path_or_fileobj=out_path,
        path_in_repo=out_path.split("eval-queue/")[1],
        repo_id=QUEUE_REPO,
        repo_type="dataset",
        commit_message=f"Add {configuration['model_name']} {n_shot}-shot to eval queue",
    )

    # Remove the local file
    os.remove(out_path)

    return styled_message(
        "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
    )