Spaces:

gregorkrzmanc
/

jetclustering

Sleeping

File size: 5,669 Bytes

e75a247

import os
from tqdm import tqdm
import argparse
import pickle
from src.plotting.eval_matrix import matrix_plot, scatter_plot
from src.utils.paths import get_path
import matplotlib.pyplot as plt
import numpy as np


parser = argparse.ArgumentParser()
parser.add_argument("--input", type=str, required=False, default="scouting_PFNano_signals2/SVJ_hadronic_std/all_models_eval_FT_R")

args = parser.parse_args()
path = get_path(args.input, "results")

models = sorted([x for x in os.listdir(path) if not os.path.isfile(os.path.join(path, x))])

radius = {
    "LGATr_R10": 1.0,
    "LGATr_R09": 0.9,
    "LGATr_rinv_03_m_900": 0.8,
    "LGATr_R08": 0.8,
    "LGATr_R06": 0.6,
    "LGATr_R07": 0.7,
    "LGATr_R11": 1.1,
    "LGATr_R12": 1.2,
    "LGATr_R13": 1.3,
    "LGATr_R14": 1.4,
    "LGATr_R20": 2.0,
    "LGATr_R25": 2.5

}

out_file = {}

sz = 5
fig, ax = plt.subplots(len(models), 2, figsize=(sz * 2, sz/2 * len(models)))

bins = np.linspace(0, 2, 100)
for i, model in tqdm(enumerate(models)):
    output_path = os.path.join(path, model, "count_matched_quarks")
    f = os.path.join(output_path, "result_m.pkl")
    if not os.path.isfile(f):
        continue
    result = pickle.load(open(f, "rb"))
    f1 = os.path.join(output_path, "result_PR.pkl")
    r = result[900][20][0.3]
    ax[i, 0].hist(r["m_pred"] / r["m_true"], bins=bins, histtype="step", label="all")
    ax[i, 1].hist(r["mt_pred"] / r["mt_true"], bins=bins, histtype="step", label="all")
    if "n_jets" in r:
        m_pred_over_true = r["m_pred"] / r["m_true"]
        mt_pred_over_true = r["mt_pred"] / r["mt_true"]
        ax[i, 0].hist(m_pred_over_true[r["n_jets"] == 0], bins=bins, histtype="step", label="0 jets")
        ax[i, 1].hist(mt_pred_over_true[r["n_jets"] == 0], bins=bins, histtype="step", label="0 jets")
        ax[i, 0].hist(m_pred_over_true[r["n_jets"] == 1], bins=bins, histtype="step", label="1 jet")
        ax[i, 1].hist(mt_pred_over_true[r["n_jets"] == 1], bins=bins, histtype="step", label="1 jet")
        # 2+ jets
        ax[i, 0].hist(m_pred_over_true[r["n_jets"] > 1], bins=bins, histtype="step", label="2+ jets")
        ax[i, 1].hist(mt_pred_over_true[r["n_jets"] > 1], bins=bins, histtype="step", label="2+ jets")
        ax[i, 0].legend()
        ax[i, 1].legend()
    ax[i, 0].set_title(model)
    ax[i, 1].set_title(model)
    ax[i, 0].set_xlabel("m_pred / m_true")
    ax[i, 1].set_xlabel("mt_pred / mt_true")
    ax[i, 0].set_yscale("log")
    ax[i, 1].set_yscale("log")
fig.tight_layout()
fig.savefig(os.path.join(path, "mass_histograms.pdf"))



#######

sz = 5
r_invs = {"03": 0.3, "07": 0.7, "05": 0.5}
c = {}
for r_inv in r_invs:
    fig, ax = plt.subplots(len(result), 2, figsize=(sz * 2, sz/2 * len(models)))
    bins = np.linspace(0, 2, 100)
    for i, mmed in tqdm(enumerate(sorted(result.keys()))):
        for j, model in enumerate(models):
            output_path = os.path.join(path, model, "count_matched_quarks")
            f = os.path.join(output_path, "result_m.pkl")
            if not os.path.isfile(f):
                continue
            if f not in c:
                c[f] = pickle.load(open(f, "rb"))
            result = c[f]
            r = result[mmed][20][r_invs[r_inv]]
            ax[i, 0].hist(r["m_pred"] / r["m_true"], bins=bins, histtype="step", label=model)
            ax[i, 1].hist(r["mt_pred"] / r["mt_true"], bins=bins, histtype="step", label=model)
        ax[i, 0].set_title("m_med = " + str(mmed))
        ax[i, 1].set_title("m_med = " + str(mmed))
        ax[i, 0].set_xlabel("m_pred / m_true")
        ax[i, 1].set_xlabel("mt_pred / mt_true")
        ax[i, 0].set_yscale("log")
        ax[i, 1].set_yscale("log")
        ax[i, 0].legend()
        ax[i, 1].legend()
    fig.tight_layout()
    fig.savefig(os.path.join(path, "mass_histograms_model_comparison_{}.pdf".format(r_inv)))

##########
blues = plt.get_cmap("Blues")
def get_color(model):
    if model == "AK8":
        return "gray"
    if model == "AK8_GenJets":
        return "black"
    # else, get the radius
    R = radius[model]
    # normalize R between 0 and 1 (originally between 0.3 and 1.4)
    r = (R - 0.3) / (2.5 - 0.3)
    return blues(r)


sz = 5
r_invs = {"03": 0.3, "07": 0.7, "05": 0.5}
c = {}
for r_inv in r_invs:
    fig, ax = plt.subplots(len(result), 2, figsize=(sz * 2, sz/2 * len(models)))
    bins = np.linspace(0, 2, 100)
    bins2 = np.linspace(0, 2, 50)

    for i, mmed in tqdm(enumerate(sorted(result.keys()))):
        for j, model in enumerate(models):
            output_path = os.path.join(path, model, "count_matched_quarks")
            f = os.path.join(output_path, "result_m.pkl")
            if not os.path.isfile(f):
                continue
            if f not in c:
                c[f] = pickle.load(open(f, "rb"))
            result = c[f]
            r = result[mmed][20][r_invs[r_inv]]
            if "n_jets" in r and (model in radius or model in ["AK8", "AK8_GenJets"]):
                ax[i, 0].hist((r["m_pred"] / r["m_true"])[r["n_jets"] == 2], bins=bins2, histtype="step", label=model, color=get_color(model))
                ax[i, 1].hist((r["mt_pred"] / r["mt_true"])[r["n_jets"] == 2], bins=bins2, histtype="step", label=model, color=get_color(model))
        ax[i, 0].set_title("m_med = " + str(mmed))
        ax[i, 1].set_title("m_med = " + str(mmed))
        ax[i, 0].set_xlabel("m_pred / m_true")
        ax[i, 1].set_xlabel("mt_pred / mt_true")
        ax[i, 0].set_yscale("log")
        ax[i, 1].set_yscale("log")
        ax[i, 0].legend()
        ax[i, 1].legend()
    fig.tight_layout()
    fig.savefig(os.path.join(path, "mass_histograms_model_comparison_2jets_{}.pdf".format(r_inv)))