jetclustering / src /evaluation /clustering_metrics.py
gregorkrzmanc's picture
.
e75a247
raw
history blame
4.07 kB
from tqdm import tqdm
import numpy as np
from src.dataset.dataset import EventDataset, get_batch_bounds
def compute_f1_score(dataset, dataset_cap=-1):
R = 0.8
counts = {
"n_matched_dark_quarks": 0,
"n_jets": 0,
"n_dark_quarks": 0
} # Array of [n_relevant_retrieved, all_retrieved, all_relevant], or in our language, [n_matched_dark_quarks, n_jets, n_dark_quarks]
n = 0
for x in tqdm(range(len(dataset))):
data = dataset[x]
jets_object = data.model_jets
n += 1
if dataset_cap != -1 and n >= dataset_cap:
break
jets = [jets_object.eta, jets_object.phi]
dq = [data.matrix_element_gen_particles.eta, data.matrix_element_gen_particles.phi]
# calculate deltaR between each jet and each quark
distance_matrix = np.zeros((len(jets_object), len(data.matrix_element_gen_particles)))
for i in range(len(jets_object)):
for j in range(len(data.matrix_element_gen_particles)):
deta = jets[0][i] - dq[0][j]
dphi = jets[1][i] - dq[1][j]
distance_matrix[i, j] = np.sqrt(deta**2 + dphi**2)
# row-wise argmin
distance_matrix = distance_matrix.T
#min_distance = np.min(distance_matrix, axis=1)
n_jets = len(jets_object)
counts["n_jets"] += n_jets
counts["n_dark_quarks"] += len(data.matrix_element_gen_particles)
if len(jets_object):
quark_to_jet = np.min(distance_matrix, axis=1)
quark_to_jet[quark_to_jet > R] = -1
counts["n_matched_dark_quarks"] += np.sum(quark_to_jet != -1)
if counts["n_jets"] != 0:
precision = counts["n_matched_dark_quarks"] / counts["n_jets"]
else:
precision = 0
if counts["n_dark_quarks"] != 0:
recall = counts["n_matched_dark_quarks"] / counts["n_dark_quarks"]
else:
recall = 0
if precision == 0 or recall == 0:
return 0
f1_score = 2 * precision * recall / (precision + recall)
return f1_score
def compute_f1_score_from_result(result, dataset):
R = 0.8
counts = {
"n_matched_dark_quarks": 0,
"n_jets": 0,
"n_dark_quarks": 0
} # Array of [n_relevant_retrieved, all_retrieved, all_relevant], or in our language, [n_matched_dark_quarks, n_jets, n_dark_quarks]
result["event_idx_bounds"] = get_batch_bounds(result["event_idx"])
l = result["event_idx"].max().int().item()
for x in tqdm(range(len(dataset))):
if x >= l:
break
data = dataset[x]
jets_object = EventDataset.get_model_jets_static(x, data.pfcands, result, result["model_cluster"])
if jets_object is None:
continue
jets = [jets_object.eta, jets_object.phi]
dq = [data.matrix_element_gen_particles.eta, data.matrix_element_gen_particles.phi]
# calculate deltaR between each jet and each quark
distance_matrix = np.zeros((len(jets_object), len(data.matrix_element_gen_particles)))
for i in range(len(jets_object)):
for j in range(len(data.matrix_element_gen_particles)):
deta = jets[0][i] - dq[0][j]
dphi = jets[1][i] - dq[1][j]
distance_matrix[i, j] = np.sqrt(deta**2 + dphi**2)
# row-wise argmin
distance_matrix = distance_matrix.T
#min_distance = np.min(distance_matrix, axis=1)
n_jets = len(jets_object)
counts["n_jets"] += n_jets
counts["n_dark_quarks"] += len(data.matrix_element_gen_particles)
if len(jets_object):
quark_to_jet = np.min(distance_matrix, axis=1)
quark_to_jet[quark_to_jet > R] = -1
counts["n_matched_dark_quarks"] += np.sum(quark_to_jet != -1)
if counts["n_jets"] == 0 or counts["n_dark_quarks"] == 0:
return 0
precision = counts["n_matched_dark_quarks"] / counts["n_jets"]
recall = counts["n_matched_dark_quarks"] / counts["n_dark_quarks"]
f1_score = 2 * precision * recall / (precision + recall)
return f1_score