import json
import os
import re
from datetime import datetime, timezone
from pathlib import Path

from src.display.formatting import styled_error, styled_message, styled_warning
from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO, PROMPT_VERSIONS, PREDICTIONS_REPO
from src.submission.check_validity import already_submitted_models, is_model_on_hub, get_model_properties

REQUESTED_MODELS = None


def read_configuration(file_paths):
    configuration_file_paths = list(filter(lambda file_path: file_path.name.endswith(".json"), file_paths or []))
    if len(configuration_file_paths) != 1:
        return None, None, None, None, None, styled_error(f"Expected exactly one configuration file but found {len(configuration_file_paths)}!")

    configuration_file_path = file_paths.pop(file_paths.index(configuration_file_paths[0]))

    try:
        with open(configuration_file_path.name, "r", encoding="utf-8") as f:
            data = json.load(f)
    except Exception:
        return None, None, None, None, None, styled_error("Failed to read configuration file!")

    try:
        model_name = data["model_name"]
        model_args = {
            **dict({tuple(arg.split("=")) for arg in data["config"].get("model_args", "").split(",") if len(arg) > 0}),
            "revision": data["config"]["model_revision"],
            "trust_remote_code": True,
            "cache_dir": None
        }
        base_model = model_args.pop("pretrained")
        model_on_hub, error, _ = is_model_on_hub(model_name=base_model, model_args=model_args, token=TOKEN, test_tokenizer=True)
        if not model_on_hub:
            return None, None, model_name, None, None, styled_error(f"Model {model_name} {error}")

        limit = data["config"]["limit"]
        if limit is not None:
            return None, None, model_name, None, None, styled_error(f"Only full results are accepted but found a specified limit of {limit}!")

        prediction_files = {}
        versions = {}
        n_shots = {}
        for task_name, _ in data["configs"].items():
            sample_files = list(filter(lambda file_path: re.search(rf"samples_{task_name}_.*\.jsonl", file_path.name), file_paths))
            if len(sample_files) == 0:
                return None, None, model_name, None, None, styled_error(f"No prediction file found for configured task {task_name}!")

            prediction_files[task_name] = str(file_paths.pop(file_paths.index(sample_files[0])))

            versions[task_name] = data["versions"][task_name]
            n_shots[task_name] = data["n-shot"][task_name]
        if len(prediction_files) == 0:
            return None, None, model_name, None, None, styled_error("No tasks found in configuration!")

        versions = set(versions.values())
        if len(versions) != 1:
            return None, None, model_name, None, None, styled_error(f"All tasks should have the same version but found {versions}!")
        version = list(versions)[0]
        if version not in PROMPT_VERSIONS:
            return None, None, model_name, None, None, styled_error(f"Unknown version {version}, should be one of {PROMPT_VERSIONS}!")

        n_shots = set(n_shots.values())
        if len(n_shots) != 1:
            return None, None, model_name, version, None, styled_error(f"All tasks should have the same number of shots but found {n_shots}!")
        n_shot = list(n_shots)[0]
    except KeyError:
        return None, None, model_name, None, None, styled_error("Wrong configuration file format!")

    if len(file_paths) > 0:
        ignored_files = [Path(file_path).name for file_path in file_paths]
        return data, prediction_files, model_name, version, n_shot, styled_warning(f"The following files will be ignored: {ignored_files}")
    return data, prediction_files, model_name, version, n_shot, styled_message("Files parsed successfully, verify that read metadata is correct before submitting")


def add_new_eval(
    model_training: str,
    maltese_training: str,
    language_count: int,
    configuration: dict,
    prediction_files: dict[str, str],
):
    global REQUESTED_MODELS
    if not REQUESTED_MODELS:
        REQUESTED_MODELS = already_submitted_models(EVAL_REQUESTS_PATH)

    current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%S.%f")

    if configuration is None or configuration == {} or prediction_files is None or prediction_files == {}:
        return styled_error("No files selected for upload, please upload an output folder (or wait for the files to finish uploading).")

    if model_training is None or model_training == "":
        return styled_error("Please select the model's overall training.")

    if maltese_training is None or maltese_training == "":
        return styled_error("Please select the model's Maltese training.")

    if language_count is None or language_count < 1:
        language_count = None

    model_name, revision, precision, seed, prompt_version, n_shot = get_model_properties(configuration)
    model_id = configuration["model_name"]

    # Seems good, creating the eval
    print("Adding new eval")

    # Check for duplicate submission
    if f"{model_name}_{revision}_{precision}_{seed}_{prompt_version}_{n_shot}" in REQUESTED_MODELS:
        return styled_warning("This model has been already submitted.")

    request = {
        "model": model_id,
        "model_args": dict({tuple(arg.split("=")) for arg in configuration["config"].get("model_args", "").split(",") if len(arg) > 0}),
        "revision": revision,
        "precision": precision,
        "seed": seed,
        "n_shot": n_shot,
        "prompt_version": prompt_version,
        "tasks": list(configuration["configs"].keys()),
        "model_training": model_training,
        "maltese_training": maltese_training,
        "language_count": language_count,
        "submitted_time": current_time,
        "status": "PENDING",
    }

    for task_name, file_path in prediction_files.items():
        print(f"Uploading {model_id} {task_name} prediction file")
        API.upload_file(
            path_or_fileobj=file_path,
            path_in_repo=f"{n_shot}-shot_{prompt_version}/{model_name}_{revision}_{precision}/{seed}-seed/samples_{task_name}_{current_time}.jsonl",
            repo_id=PREDICTIONS_REPO,
            repo_type="dataset",
            commit_message=f"Add {configuration['model_name']} {task_name} {n_shot}-shot outputs",
        )

    print(f"Creating {model_id} configruation file")
    OUT_DIR = f"{EVAL_REQUESTS_PATH}/{model_name}"
    os.makedirs(OUT_DIR, exist_ok=True)
    out_path = f"{OUT_DIR}/requests_{model_name}_{revision}_{precision}_{n_shot}shot_{prompt_version}_{seed}seed_{current_time}.json"

    with open(out_path, "w") as f:
        f.write(json.dumps({"leaderboard": request, "configuration": configuration}, ensure_ascii=False, indent=2))

    print(f"Uploading {model_id} configuration file")
    API.upload_file(
        path_or_fileobj=out_path,
        path_in_repo=out_path.split("eval-queue/")[1],
        repo_id=QUEUE_REPO,
        repo_type="dataset",
        commit_message=f"Add {configuration['model_name']} {n_shot}-shot to eval queue",
    )

    # Remove the local file
    os.remove(out_path)

    return styled_message(
        "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
    )