gregorkrzmanc's picture
.
e75a247
raw
history blame
23.5 kB
import matplotlib
matplotlib.rc("font", size=35)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import mplhep as hep
import seaborn as sns
from scipy.optimize import curve_fit
hep.style.use("CMS")
# colors_list = ["#fff7bc", "#fec44f", "#d95f0e"]
colors_list = ["#fde0dd", "#c994c7", "#dd1c77"] # color list poster neurips
marker_size = 20
def plot_for_jan(neutrals_only, dic1, dic2, dict_1, dict_2, dict_3, colors_list):
marker_size = 15
log_scale = False
fig = plt.figure()
if dic1:
plt.scatter(
dict_1["energy_resolutions"],
dict_1["variance_om"],
facecolors=colors_list[0],
edgecolors=colors_list[0],
label="HDBSCAN",
s=70,
)
if dic2:
plt.scatter(
dict_2["energy_resolutions"],
dict_2["variance_om"],
facecolors=colors_list[1],
edgecolors=colors_list[1],
label="GNN",
s=70,
)
plt.scatter(
dict_3["energy_resolutions"],
dict_3["variance_om"],
facecolors=colors_list[2],
edgecolors=colors_list[2],
label="Pandora",
marker="^",
s=70,
)
if log_scale:
plt.xlabel("Log True Energy [GeV]")
else:
plt.xlabel("True Energy [GeV]")
plt.ylabel("Resolution")
plt.grid()
plt.legend(loc="upper right")
plt.xlim([0, 21])
plt.yscale("log")
fig.tight_layout(pad=2.0)
if neutrals_only:
fig.savefig(
"/afs/cern.ch/work/m/mgarciam/private/mlpf/summ_results/Pandora_mix/histograms_energy_neutrals_only_jan.png",
bbox_inches="tight",
)
else:
fig.savefig(
"/afs/cern.ch/work/m/mgarciam/private/mlpf/summ_results/Pandora_mix/histograms_energy_jan.png",
bbox_inches="tight",
)
def plot_eff(ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, log_scale, i, j, idx_start):
if dic1:
ax[i, j].scatter(
dict_1["energy_eff"][idx_start:],
dict_1["eff"][idx_start:],
facecolors=colors_list[0],
edgecolors=colors_list[0],
marker="o",
label="HDBSCAN",
s=80,
)
if dic2:
ax[i, j].scatter(
dict_2["energy_eff"][idx_start:],
dict_2["eff"][idx_start:],
facecolors=colors_list[1],
edgecolors=colors_list[1],
label="GNN",
s=70,
)
if dic3:
ax[i, j].scatter(
dict_3["energy_eff"][idx_start:],
dict_3["eff"][idx_start:],
facecolors=colors_list[2],
edgecolors=colors_list[2],
label="Pandora",
marker="^",
s=70,
)
# ax[i, j].axvline(x=0.5, color="b")
if log_scale:
ax[i, j].set_xlabel("Log True Energy [GeV]")
ax[i, j].set_xscale("log")
else:
ax[i, j].set_xlabel("True Energy [GeV]")
ax[i, j].set_ylabel("Efficiency")
ax[i, j].grid()
ax[i, j].legend(loc="lower right")
return ax
def plot_fakes(
ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, log_scale, i, j, idx_start
):
if dic1:
ax[i, j].scatter(
dict_1["energy_fakes"][idx_start:],
dict_1["fake_rate"][idx_start:],
facecolors=colors_list[0],
edgecolors=colors_list[0],
label="HDBSCAN",
marker="o",
s=80,
)
if dic2:
ax[i, j].scatter(
dict_2["energy_fakes"][idx_start:],
dict_2["fake_rate"][idx_start:],
facecolors=colors_list[1],
edgecolors=colors_list[1],
label="GNN",
s=70,
)
if dic3:
ax[i, j].scatter(
dict_3["energy_fakes"][idx_start:],
dict_3["fake_rate"][idx_start:],
facecolors=colors_list[2],
edgecolors=colors_list[2],
label="Pandora",
s=70,
marker="^",
)
# ax[i, j].axvline(x=0.5, color="b", label="axvline")
if log_scale:
ax[i, j].set_xlabel("Log Reconstructed Energy [GeV]")
ax[i, j].set_xscale("log")
else:
ax[i, j].set_xlabel("Reconstructed Energy [GeV]")
ax[i, j].set_ylabel("Fake rate")
ax[i, j].grid()
ax[i, j].set_yscale("log")
ax[i, j].legend(loc="upper right")
return ax
def plot_response(
ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, log_scale, i, j, idx_start
):
# Energy resolution is parametrized
if dic1:
ax[i, j].scatter(
dict_1["energy_resolutions_reco"][idx_start:],
dict_1["mean_true_rec"][idx_start:],
facecolors=colors_list[0],
edgecolors=colors_list[0],
marker="o",
label="HDBSCAN",
s=80,
)
if dic2:
ax[i, j].scatter(
dict_2["energy_resolutions_reco"][idx_start:],
dict_2["mean_true_rec"][idx_start:],
facecolors=colors_list[1],
edgecolors=colors_list[1],
label="GNN",
s=70,
)
if dic3:
ax[i, j].scatter(
dict_3["energy_resolutions_reco"][idx_start:],
dict_3["mean_true_rec"][idx_start:],
facecolors=colors_list[2],
edgecolors=colors_list[2],
label="Pandora",
marker="^",
s=70,
)
if log_scale:
ax[i, j].set_xlabel("Log Reco Energy [GeV]")
ax[i, j].set_xscale("log")
else:
ax[i, j].set_xlabel("Reco Energy [GeV]")
ax[i, j].set_ylabel("Response")
ax[i, j].grid()
ax[i, j].legend(loc="lower right")
return ax
def plot_fit_energy_resolution(
ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, log_scale, i, j, reco=False
):
if reco:
energies = np.array(dict_1["energy_resolutions_reco"])
errors = np.array(dict_1["variance_om_true_rec"])
else:
energies = np.array(dict_1["energy_resolutions"])
errors = np.array(dict_1["variance_om"])
mask = (energies > 1.0) * (errors < 0.38)
energies = energies[mask]
errors = errors[mask]
popt, pcov = curve_fit(resolution, energies, errors)
xdata = np.arange(1, 51, 0.1)
ax[i, j].plot(
xdata,
resolution(xdata, *popt),
"-",
c=colors_list[1],
label="fit GNN: a=%5.3f, b=%5.3f, c=%5.3f" % tuple(popt),
)
if reco:
energies = np.array(dict_3["energy_resolutions_reco"])
errors = np.array(dict_3["variance_om_true_rec"])
else:
energies = np.array(dict_3["energy_resolutions"])
errors = np.array(dict_3["variance_om"])
mask = (energies > 1.0) * (errors < 0.38)
energies = energies[mask]
errors = errors[mask]
popt, pcov = curve_fit(resolution, energies, errors)
xdata = np.arange(1, 51, 0.1)
ax[i, j].plot(
xdata,
resolution(xdata, *popt),
"-",
c=colors_list[2],
label="fit Pandora: a=%5.3f, b=%5.3f, c=%5.3f" % tuple(popt),
)
ax[i, j].legend(loc="upper right")
return ax
def resolution(E, a, b, c):
return (a**2 / E + c**2 + b**2 / E**2) ** 0.5
def plot_resolution(
ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, log_scale, i, j, idx_start
):
if dic1:
ax[i, j].scatter(
dict_1["energy_resolutions_reco"][idx_start:],
dict_1["variance_om_true_rec"][idx_start:],
facecolors=colors_list[0],
edgecolors=colors_list[0],
label="HDBSCAN",
marker="o",
s=80,
)
if dic2:
ax[i, j].scatter(
dict_2["energy_resolutions_reco"][idx_start:],
dict_2["variance_om_true_rec"][idx_start:],
facecolors=colors_list[1],
edgecolors=colors_list[1],
label="GNN",
s=70,
)
if dic3:
ax[i, j].scatter(
dict_3["energy_resolutions_reco"][idx_start:],
dict_3["variance_om_true_rec"][idx_start:],
facecolors=colors_list[2],
edgecolors=colors_list[2],
label="Pandora",
marker="^",
s=70,
)
# ax[i, j].axvline(x=0.5, color="b", label="axvline")
if log_scale:
ax[i, j].set_xlabel("Log Reco Energy [GeV]")
ax[i, j].set_xscale("log")
else:
ax[i, j].set_xlabel("Reco Energy [GeV]")
ax[i, j].set_ylabel("Resolution")
ax[i, j].grid()
ax[i, j].set_yscale("log")
ax[i, j].legend(loc="upper right")
# ax[1, 1].set_ylim([0, 1])
return ax
def plot_response_trueE(
ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, log_scale, i, j, idx_start
):
if dic1:
ax[i, j].scatter(
dict_1["energy_resolutions"][idx_start:],
dict_1["mean"][idx_start:],
facecolors=colors_list[0],
edgecolors=colors_list[0],
label="HDBSCAN",
marker="o",
s=80,
)
if dic2:
ax[i, j].scatter(
dict_2["energy_resolutions"][idx_start:],
dict_2["mean"][idx_start:],
facecolors=colors_list[1],
edgecolors=colors_list[1],
label="GNN",
s=70,
)
if dic3:
ax[i, j].scatter(
dict_3["energy_resolutions"][idx_start:],
dict_3["mean"][idx_start:],
facecolors=colors_list[2],
edgecolors=colors_list[2],
label="Pandora",
marker="^",
s=70,
)
# ax[i, j].axvline(x=0.5, color="b", label="axvline")
if log_scale:
ax[i, j].set_xlabel("Log True Energy [GeV]")
ax[i, j].set_xscale("log")
else:
ax[i, j].set_xlabel("True Energy [GeV]")
ax[i, j].set_ylabel("Resolution")
ax[i, j].set_ylabel("Response")
ax[i, j].grid()
ax[i, j].legend(loc="lower right")
# ax[2, 0].set_ylim([0, 1.5])
return ax
def plot_resolution_trueE(
ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, log_scale, i, j, idx_start
):
if dic1:
ax[i, j].scatter(
dict_1["energy_resolutions"][idx_start:],
dict_1["variance_om"][idx_start:],
facecolors=colors_list[0],
edgecolors=colors_list[0],
label="HDBSCAN",
marker="o",
s=80,
)
if dic2:
ax[i, j].scatter(
dict_2["energy_resolutions"][idx_start:],
dict_2["variance_om"][idx_start:],
facecolors=colors_list[1],
edgecolors=colors_list[1],
label="GNN",
s=70,
)
if dic3:
ax[i, j].scatter(
dict_3["energy_resolutions"][idx_start:],
dict_3["variance_om"][idx_start:],
facecolors=colors_list[2],
edgecolors=colors_list[2],
label="Pandora",
marker="^",
s=70,
)
# ax[i, j].axvline(x=0.5, color="b", label="axvline")
if log_scale:
ax[i, j].set_xlabel("Log True Energy [GeV]")
ax[i, j].set_xscale("log")
else:
ax[i, j].set_xlabel("True Energy [GeV]")
ax[i, j].set_ylabel("Resolution")
ax[i, j].set_ylabel("Resolution")
ax[i, j].set_yscale("log")
ax[i, j].grid()
ax[i, j].legend(loc="upper right")
return ax
def plot_containment(
ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, log_scale, i, j, idx_start
):
if dic1:
ax[i, j].errorbar(
np.array(dict_1["energy_ms"])[idx_start:],
np.array(dict_1["fce_energy"])[idx_start:],
np.array(dict_1["fce_var_energy"])[idx_start:],
marker="o",
mec=colors_list[0],
mfc=colors_list[0],
ecolor=colors_list[0],
ms=marker_size,
mew=4,
linestyle="",
)
if dic2:
ax[i, j].errorbar(
np.array(dict_2["energy_ms"])[idx_start:],
np.array(dict_2["fce_energy"])[idx_start:],
np.array(dict_2["fce_var_energy"])[idx_start:],
marker=".",
mfc=colors_list[1],
mec=colors_list[1],
ecolor=colors_list[1],
ms=marker_size,
mew=4,
linestyle="",
)
if dic3:
# ax[i, j].axvline(x=0.5, color="b", label="axvline")
ax[i, j].errorbar(
np.array(dict_3["energy_ms"])[idx_start:],
np.array(dict_3["fce_energy"])[idx_start:],
np.array(dict_3["fce_var_energy"])[idx_start:],
marker="^",
mfc=colors_list[2],
mec=colors_list[2],
ecolor=colors_list[2],
ms=marker_size,
mew=4,
linestyle="",
)
if log_scale:
ax[i, j].set_xlabel("Log Reco Energy [GeV]")
ax[i, j].set_xscale("log")
else:
ax[i, j].set_xlabel("Reco Energy [GeV]")
ax[i, j].set_ylabel("Containment")
ax[i, j].grid()
# ax[3, 0].set_yscale("log")
return ax
def plot_purity(
ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, log_scale, i, j, idx_start
):
if dic1:
ax[i, j].errorbar(
np.array(dict_1["energy_ms"])[idx_start:],
np.array(dict_1["purity_energy"])[idx_start:],
np.array(dict_1["purity_var_energy"])[idx_start:],
marker="o",
mec=colors_list[0],
mfc=colors_list[0],
ecolor=colors_list[0],
ms=marker_size,
mew=4,
linestyle="",
)
if dic2:
ax[i, j].errorbar(
np.array(dict_2["energy_ms"])[idx_start:],
np.array(dict_2["purity_energy"])[idx_start:],
np.array(dict_2["purity_var_energy"])[idx_start:],
marker=".",
mec=colors_list[1],
mfc=colors_list[1],
ecolor=colors_list[1],
ms=marker_size,
mew=4,
linestyle="",
)
if dic3:
# ax[i, j].axvline(x=0.5, color="b", label="axvline")
ax[i, j].errorbar(
np.array(dict_3["energy_ms"])[idx_start:],
np.array(dict_3["purity_energy"])[idx_start:],
np.array(dict_3["purity_var_energy"])[idx_start:],
marker="^",
mec=colors_list[2],
mfc=colors_list[2],
ecolor=colors_list[2],
ms=marker_size,
mew=4,
linestyle="",
)
if log_scale:
ax[i, j].set_xlabel("Log Reco Energy [GeV]")
ax[i, j].set_xscale("log")
else:
ax[i, j].set_xlabel("Reco Energy [GeV]")
ax[i, j].set_ylabel("Purity")
ax[i, j].grid()
# ax[3, 1].set_yscale("log")
# ax[3, 1].set_ylim([0, 1.5])
return ax
def plot_metrics(
neutrals_only,
dic1,
dic2,
dic3,
dict_1,
dict_2,
dict_3,
colors_list,
PATH_store,
log_scale,
):
marker_size = 15
idx_start = 5
fig, ax = plt.subplots(4, 4, figsize=(9 * 4, 8 * 4))
# efficiency plot
ax = plot_eff(ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, True, 0, 0, idx_start)
ax = plot_eff(ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, False, 0, 2, idx_start)
# fake rates
ax = plot_fakes(ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, True, 0, 1, idx_start)
ax = plot_fakes(
ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, False, 0, 3, idx_start
)
# response
ax = plot_response(
ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, True, 1, 0, idx_start
)
ax = plot_response(
ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, False, 1, 2, idx_start
)
# resolution
ax = plot_resolution(
ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, True, 1, 1, idx_start
)
ax = plot_resolution(
ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, False, 1, 3, idx_start
)
# ax = plot_fit_energy_resolution(
# ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, log_scale, 1, 1, reco=True
# )
# ax = plot_fit_energy_resolution(
# ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, log_scale, 1, 3, reco=True
# )
# response true_e
ax = plot_response_trueE(
ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, True, 2, 0, idx_start
)
ax = plot_response_trueE(
ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, False, 2, 2, idx_start
)
# resolution true_e
ax = plot_resolution_trueE(
ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, True, 2, 1, idx_start
)
ax = plot_resolution_trueE(
ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, False, 2, 3, idx_start
)
# ax = plot_fit_energy_resolution(
# ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, log_scale, 2, 1, reco=False
# )
# ax = plot_fit_energy_resolution(
# ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, log_scale, 2, 3, reco=False
# )
# containment
ax = plot_containment(
ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, True, 3, 0, idx_start
)
ax = plot_containment(
ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, False, 3, 2, idx_start
)
# containment
ax = plot_purity(
ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, True, 3, 1, idx_start
)
ax = plot_purity(
ax, dic1, dict_1, dic2, dict_2, dic3, dict_3, False, 3, 3, idx_start
)
if neutrals_only:
fig.savefig(
PATH_store + "testeq_rec_comp_MVP_68_calibrated_neutrals_compare_to_V8.png",
bbox_inches="tight",
)
else:
fig.savefig(
PATH_store + "testeq_rec_comp_MVP_68_calibrated_compare_to_V8.png",
bbox_inches="tight",
)
def plot_histograms_energy(
dic1, dic2, dict_1, dict_2, dict_3, neutrals_only=False, PATH_store=None
):
dict_2 = dict_1
bins_plot_histogram = [5, 6, 10, 20]
bins = np.exp(np.arange(np.log(0.1), np.log(80), 0.3))
bins = np.arange(0, 51, 2)
fig, ax = plt.subplots(4, 2, figsize=(18, 25))
for i in range(0, 4):
bin_name = bins_plot_histogram[i]
sns.histplot(
data=np.array(dict_2["dic_histograms"][str(bin_name) + "reco"]),
label="MLPF",
stat="percent",
color=colors_list[1],
element="step",
binwidth=0.02,
fill=False,
ax=ax[i, 0],
linewidth=2,
)
sns.histplot(
dict_3["dic_histograms"][str(bin_name) + "reco"],
label="Pandora",
stat="percent",
color=colors_list[2],
element="step",
fill=False,
ax=ax[i, 0],
binwidth=0.02,
linewidth=2,
)
sns.histplot(
dict_2["dic_histograms"][str(bin_name) + "true"],
label="MLPF",
stat="percent",
color=colors_list[1],
element="step",
fill=False,
ax=ax[i, 1],
binwidth=0.02,
linewidth=2,
)
sns.histplot(
dict_3["dic_histograms"][str(bin_name) + "true"],
label="Pandora",
stat="percent",
color=colors_list[2],
element="step",
fill=False,
ax=ax[i, 1],
binwidth=0.02,
linewidth=2,
)
sns.histplot(
dict_2["dic_histograms"][str(bin_name) + "reco_showers"],
label="Reco",
stat="percent",
color=colors_list[0],
element="step",
fill=False,
ax=ax[i, 1],
binwidth=0.02,
linewidth=2,
)
ax[i, 1].set_xlabel("E pred / True Energy [GeV]")
# ax[i, 1].set_xlim([0, 2])
ax[i, 1].grid()
ax[i, 1].legend(loc="upper right")
ax[i, 1].set_title(
"["
+ str(np.round(bins[bin_name], 2))
+ ", "
+ str(np.round(bins[bin_name + 1], 2))
+ "]"
+ " GeV"
)
ax[i, 0].set_xlabel("E pred / Reco Energy [GeV]")
ax[i, 0].grid()
ax[i, 0].set_xlim([0, 2])
ax[i, 0].legend(loc="upper right")
ax[i, 0].set_title(
"["
+ str(np.round(bins[bin_name], 2))
+ ", "
+ str(np.round(bins[bin_name + 1], 2))
+ "]"
+ " GeV"
)
# ax[i, 0].set_yscale("log")
fig.tight_layout(pad=2.0)
if neutrals_only:
fig.savefig(
PATH_store + "histograms_energy_neutrals_only_iou_th.png",
bbox_inches="tight",
)
else:
fig.savefig(
PATH_store + "histograms_energy_iou_th.png",
bbox_inches="tight",
)
def plot_correction(
dic1, dic2, dict_1, dict_2, dict_3, neutrals_only=False, PATH_store=None
):
bins = np.exp(np.arange(np.log(0.1), np.log(80), 0.3))
bins_plot_histogram = [5, 6, 10, 20]
fig, ax = plt.subplots(4, 3, figsize=(9 * 3, 25))
for i in range(0, 4):
bin_name = bins_plot_histogram[i]
ax[i, 0].scatter(
dict_3["dic_histograms"][str(bin_name) + "pred_e"],
dict_3["dic_histograms"][str(bin_name) + "pred_corr_e"],
label="Pandora",
color=colors_list[2],
)
ax[i, 1].scatter(
dict_2["dic_histograms"][str(bin_name) + "pred_e"],
dict_2["dic_histograms"][str(bin_name) + "pred_corr_e"],
label="MLPF",
c=colors_list[1],
marker="^",
)
ax[i, 2].scatter(
dict_2["dic_histograms"][str(bin_name) + "reco_baseline"],
dict_2["dic_histograms"][str(bin_name) + "true_baseline"],
label="True",
color=colors_list[0],
)
ax[i, 0].set_xlabel("E pred [GeV]")
ax[i, 0].set_ylabel("E calibrated[GeV]")
ax[i, 0].grid()
# ax[i, 0].set_xlim([0, 2])
ax[i, 0].legend(loc="upper right")
ax[i, 0].set_title(
"["
+ str(np.round(bins[bin_name], 2))
+ ", "
+ str(np.round(bins[bin_name + 1], 2))
+ "]"
+ " GeV"
)
ax[i, 1].set_xlabel("E pred [GeV]")
ax[i, 1].set_ylabel("E calibrated[GeV]")
ax[i, 1].grid()
# ax[i, 0].set_xlim([0, 2])
ax[i, 1].legend(loc="upper right")
ax[i, 1].set_title(
"["
+ str(np.round(bins[bin_name], 2))
+ ", "
+ str(np.round(bins[bin_name + 1], 2))
+ "]"
+ " GeV"
)
ax[i, 2].set_xlabel("E pred [GeV]")
ax[i, 2].set_ylabel("E calibrated[GeV]")
ax[i, 2].grid()
# ax[i, 0].set_xlim([0, 2])
ax[i, 2].legend(loc="upper right")
ax[i, 2].set_title(
"["
+ str(np.round(bins[bin_name], 2))
+ ", "
+ str(np.round(bins[bin_name + 1], 2))
+ "]"
+ " GeV"
)
# ax[i, 0].set_yscale("log")
fig.tight_layout(pad=2.0)
if neutrals_only:
fig.savefig(
PATH_store + "correction_energy_neutrals_only.png",
bbox_inches="tight",
)
else:
fig.savefig(
PATH_store + "correction_energy.png",
bbox_inches="tight",
)