import pickle import torch import os import matplotlib.pyplot as plt from src.utils.paths import get_path from src.utils.utils import CPU_Unpickler from pathlib import Path from src.dataset.dataset import EventDataset import numpy as np from src.plotting.plot_event import plot_event from pathlib import Path #%% def get_properties(name): if "qcd" in name.lower(): return 0, 0, 0 # Standard Model events # get mediator mass, dark quark mass, r_inv from the filename parts = name.strip().strip("/").split("/")[-1].split("_") try: mMed = int(parts[1].split("-")[1]) mDark = int(parts[2].split("-")[1]) rinv = float(parts[3].split("-")[1]) except: # another convention mMed = int(parts[2].split("-")[1]) mDark = int(parts[3].split("-")[1]) rinv = float(parts[4].split("-")[1]) return mMed, mDark, rinv #%% clist = ['#1f78b4', '#b3df8a', '#33a02c', '#fb9a99', '#e31a1c', '#fdbe6f', '#ff7f00', '#cab2d6', '#6a3d9a', '#ffff99', '#b15928'] colors = { -1: "gray", 0: clist[0], 1: clist[1], 2: clist[2], 3: clist[3], 4: clist[4], 5: clist[5], 6: clist[6], 7: clist[7], } #%% # The 'default' models: import fastjet models = { "GATr_rinv_03_m_900": "train/Test_betaPt_BC_all_datasets_2025_01_07_17_50_45", "GATr_rinv_07_m_900": "train/Test_betaPt_BC_all_datasets_2025_01_08_10_54_58", #"LGATr_rinv_03_m_900": "train/Test_LGATr_all_datasets_2025_01_08_19_27_54", "LGATr_rinv_07_m_900_s31k": "train/Eval_LGATr_SB_spatial_part_only_1_2025_01_13_14_31_58" } # Models with the varying R study models = { #"R06": "train/Eval_GT_R_lgatr_R06_2025_01_16_13_41_48", #"R07": "train/Eval_GT_R_lgatr_R07_2025_01_16_13_41_41", #"R09": "train/Eval_GT_R_lgatr_R09_2025_01_16_13_41_45", "R=0.8": "train/Test_LGATr_all_datasets_2025_01_08_19_27_54", "R=1.0": "train/Eval_GT_R_lgatr_R10_2025_01_16_13_41_52", "R=1.4": "train/Eval_GT_R_lgatr_R14_2025_01_18_13_28_47", "R=2.0": "train/Eval_GT_R_lgatr_R20_2025_01_22_10_51_30" } ## Objectness score odels models = { "R=2.0,OS_GT=closest_only": "train/Eval_objectness_score_2025_02_14_11_10_14", "R=2.0,GT=all_in_radius": "train/Eval_objectness_score_2025_02_12_15_34_33", "R=0.8,GT=all_in_radius": "train/Eval_objectness_score_2025_02_10_14_59_49" } # Parton-level, gen-level and scouting PFCands models models = { "parton-level": "train/Eval_no_pid_eval_2025_03_04_15_55_38", "gen-level": "train/Eval_no_pid_eval_2025_03_04_15_54_50", "scouting": "train/Eval_no_pid_eval_2025_03_04_16_06_57" } # Parton-level, gen-level and scouting PFCands models models = { "parton-level": "train/Eval_no_pid_eval_1_2025_03_05_14_41_16", "gen-level": "train/Eval_no_pid_eval_1_2025_03_05_14_40_30", "scouting": "train/Eval_no_pid_eval_1_2025_03_05_14_41_38" } models = { "parton-level": "train/Eval_no_pid_eval_full_1_2025_03_18_16_56_02", "scouting": "train/Eval_no_pid_eval_full_1_2025_03_17_21_19_22", "gen-level": "train/Eval_no_pid_eval_full_1_2025_03_18_16_45_41" } # Trained on all data! models1 = { "parton-level": "train/Eval_no_pid_eval_full_1_2025_03_17_23_44_49", "scouting PFCands": "train/Eval_no_pid_eval_full_1_2025_03_18_15_31_41", "gen-level": "train/Eval_no_pid_eval_full_1_2025_03_18_15_31_58" } # Trained on 900_03, but evaluated with eta and pt filters for the particles models = { "parton-level": "train/Eval_eval_19March2025_2025_03_19_22_08_15", "scouting PFCands": "train/Eval_eval_19March2025_2025_03_19_22_08_22", "gen-level": "train/Eval_eval_19March2025_2025_03_19_22_08_18" } import wandb api = wandb.Api() def get_eval_run_names(tag): # from the api, get all the runs with the tag that are finished runs = api.runs( path="fcc_ml/svj_clustering", filters={"tags": {"$in": [tag.strip()]}} ) return [run.name for run in runs if run.state == "finished"], [run.config for run in runs if run.state == "finished"] def get_run_by_name(name): runs = api.runs( path="fcc_ml/svj_clustering", filters={"display_name": {"$eq": name.strip()}} ) runs = api.runs( path="fcc_ml/svj_clustering", filters={"display_name": {"$eq": name.strip()}} ) if runs.length != 1: return None return runs[0] def get_models_from_tag(tag): models = {} for run in get_eval_run_names(tag)[0]: print("Run:", run) run = get_run_by_name(run) if run.config["parton_level"]: name = "parton-level" elif run.config[("gen_level")]: name = "gen-level" else: name = "sc. " if run.config["augment_soft_particles"]: name += " (aug)" if run.config["gt_radius"]: name += " GT_R=" + str(run.config["gt_radius"]) if "transformer" in run.config["network_config"]: name += " (T)" if run.config["load_from_run"] == "debug_IRC_loss_weighted100_plus_ghosts_2025_04_09_13_48_55_569": name += " IRC" elif run.config["load_from_run"] == "LGATr_500part_NOQMin_2025_04_09_21_53_37_210": name += " NoIRC" elif run.config["load_from_run"] == "IRC_loss_Split_and_Noise_alternate_NoAug_2025_04_11_16_15_48_955": name += " IRC S+N" models[name] = "train/" + run.name return models # with pt=1e-2 ghost particles, also trained on this #models = get_models_from_tag("eval_19March2025_small_aug_vanishing_momentum_Qcap05_p1e-2") #models = get_models_from_tag("eval_19March2025_small_aug_vanishing_momentum") #models = get_models_from_tag("SmallDSReprod2") #models = get_models_from_tag("eval_19March2025_pt1e-2_500particles_NoQMinReprod") ''' models = {} #models["PL_aug_working"] = "train/Eval_eval_19March2025_small_aug_FTsoft1_2025_03_27_17_15_24_17" # This one was working ~ok for parton-level, why doesn't it work anymore? models["reprod1"] = "train/Eval_eval_19March2025_small_aug_vanishing_momentum_Qcap05_p1e-2_reprod_1_2025_03_30_16_20_37_779" # reprod1 is using the same model as above, but eval'd on pt=1e-2 particles # reprod2 has pt uniform 0.01-50 particles models["reprod2"] = "train/Eval_eval_19March2025_reprod_2_2025_03_30_17_37_54_193" # reprod3: hdbscan min_samples set to 0 ''' models = { "L-GATr": "train/Eval_DelphesPFfix_2025_05_05_08_21_23_380" } models = { "L-GATr": "train/Eval_DelphesPFfix_FullDataset_QCD_2025_05_15_17_42_39_541" }# models = { "LGATrGP": "train/Eval_DelphesPFfix_FullDataset_TrainDSstudy_2025_05_29_09_11_46_534", #"LGATr": "" } #models = { # "QCD": "train/Eval_DelphesPFfix_FullDataset_TrainDSstudy_QCD_2025_05_18_21_54_43_705", # "700_07+900_03+QCD": "train/Eval_DelphesPFfix_FullDataset_TrainDSstudy_QCD_2025_05_18_22_18_36_991" #} print(models) # R = 2.0 models #models = { # "parton-level": "train/Eval_eval_19March2025_2025_03_19_22_55_48", # "gen-level": "train/Eval_eval_19March2025_2025_03_19_23_20_01", # "scouting PFCands": "train/Eval_eval_19March2025_2025_03_19_23_4x3_07" #} output_path = get_path("LGATr_model_out_examples_GP", "results") #output_path=get_path("LGATr_model_output_examples", "results") Path(output_path).mkdir(parents=1, exist_ok=1) sz = 3 n_events_per_file = 50 # len(models) columns, n_events_per_file rows from src.layers.object_cond import calc_eta_phi for ds in range(25): print("-------- DS:", ds) fig, ax = plt.subplots(n_events_per_file, len(models) * 3, # Colored by the model clusters, figsize=(len(models) * sz * 3, n_events_per_file * sz)) # also one only with real coordinates fig1, ax1 = plt.subplots(n_events_per_file, len(models)+1, figsize=(len(models) * sz, n_events_per_file * sz)) for mn, model in enumerate(sorted(models.keys())): print(" -------- Model:", model) dataset_path = models[model] filename = get_path(os.path.join(dataset_path, f"eval_{str(ds)}.pkl"), "results", fallback=1) clusters_file = get_path(os.path.join(dataset_path, f"clustering_hdbscan_4_05_{str(ds)}.pkl"), "results", fallback=1) #clusters_file=None if not os.path.exists(filename): print("File does not exist:", filename) continue result = CPU_Unpickler(open(filename, "rb")).load() print(result["filename"]) m_med, m_dark, r_inv = get_properties(result["filename"]) if os.path.exists(clusters_file): clusters = CPU_Unpickler(open(clusters_file, "rb")).load() else: clusters = result["model_cluster"].numpy() clusters_file = None run_config = get_run_by_name(dataset_path.split("/")[-1]).config dataset = EventDataset.from_directory(result["filename"], mmap=True, model_output_file=filename, model_clusters_file=clusters_file, include_model_jets_unfiltered=True, aug_soft=run_config["augment_soft_particles"], seed=1000000, parton_level=run_config["parton_level"], gen_level=run_config["gen_level"], fastjet_R=[0.8]) for e in range(n_events_per_file): print(" ----- event:", e) uj = dataset[e].model_jets_unfiltered fj_jets, assignment = EventDataset.get_fastjet_jets_with_assignment(dataset[e], fastjet.JetDefinition(fastjet.antikt_algorithm, 0.8), "pfcands", pt_cutoff=30) cl = clusters[result["event_idx"] == e] large_pt_clusters = [] for i in np.unique(cl): if i == -1: continue if uj.pt[i].item() >= 30: large_pt_clusters.append(i) #c = [colors.get(i, "purple") for i in clusters[result["event_idx"] == e]] c_ak = [] c = [] print("Large pt clusters:", large_pt_clusters) for i in range(len(cl)): if i not in assignment: c_ak.append("purple") else: c_ak.append(colors.get(assignment[i], "purple")) for i in clusters[result["event_idx"] == e]: if i in large_pt_clusters: c.append(colors.get(large_pt_clusters.index(i), "purple")) else: c.append("purple") model_coords = result["pred"][result["event_idx"] == e] if model_coords.shape[1] == 5: model_coords = model_coords[:, 1:] model_coords = calc_eta_phi(model_coords, 0) plot_event(dataset[e], colors=c, ax=ax[e, 3*mn], pfcands=dataset.pfcands_key) plot_event(dataset[e], colors=c, ax=ax[e, 3*mn+2], custom_coords=model_coords, pfcands=dataset.pfcands_key) plot_event(dataset[e], colors=c_ak, ax=ax[e, 3*mn+1], pfcands=dataset.pfcands_key) plot_event(dataset[e], colors=c, ax=ax1[e, mn], pfcands=dataset.pfcands_key) # print the pt of the jet in the middle of each cluster with font size 12 for j in range(len(fj_jets)): if fj_jets.pt[j].item() >= 30: ax[e, 3*mn+1].text(fj_jets.eta[j].item()+0.1, fj_jets.phi[j].item()+0.1, "AK pt="+str(round(fj_jets.pt[j].item(), 1)), color="blue", fontsize=6, alpha=0.5) for i in range(len(uj.pt)): if uj.pt[i].item() >= 30: ax[e, 3*mn].text(uj.eta[i], uj.phi[i], "M pt=" + str(round(uj.pt[i].item(), 1)), color="black", fontsize=6, alpha=0.5) ax1[e, mn].text(uj.eta[i], uj.phi[i], "M pt=" + str(round(uj.pt[i].item(), 1)), color="black", fontsize=6, alpha=0.5) #ax[e, 2*mn+1].text(model_coords[0][i], model_coords[1][i], round(uj.pt[i].item(), 1), color="black", fontsize=10, alpha=0.5) ax[e, 3 * mn].set_title(model) ax1[e, mn].set_title(model) ax[e, 3 * mn + 2].set_title(model + " (clust. space)") ax[e, 3 * mn + 1].set_title(model + " (colored AK clust.)") fig.tight_layout() fig1.tight_layout() fname = os.path.join(output_path, f"m_med_{m_med}_m_dark_{m_dark}_r_inv_{str(r_inv).replace('.','')}.pdf") fig.savefig(fname) fig1.savefig(os.path.join(output_path, f"m_med_{m_med}_m_dark_{m_dark}_r_inv_{str(r_inv).replace('.','')}_real_only.pdf")) print("Saving to", fname)