Spaces:
Sleeping
Sleeping
from tqdm import tqdm | |
import numpy as np | |
from src.dataset.dataset import EventDataset, get_batch_bounds | |
def compute_f1_score(dataset, dataset_cap=-1): | |
R = 0.8 | |
counts = { | |
"n_matched_dark_quarks": 0, | |
"n_jets": 0, | |
"n_dark_quarks": 0 | |
} # Array of [n_relevant_retrieved, all_retrieved, all_relevant], or in our language, [n_matched_dark_quarks, n_jets, n_dark_quarks] | |
n = 0 | |
for x in tqdm(range(len(dataset))): | |
data = dataset[x] | |
jets_object = data.model_jets | |
n += 1 | |
if dataset_cap != -1 and n >= dataset_cap: | |
break | |
jets = [jets_object.eta, jets_object.phi] | |
dq = [data.matrix_element_gen_particles.eta, data.matrix_element_gen_particles.phi] | |
# calculate deltaR between each jet and each quark | |
distance_matrix = np.zeros((len(jets_object), len(data.matrix_element_gen_particles))) | |
for i in range(len(jets_object)): | |
for j in range(len(data.matrix_element_gen_particles)): | |
deta = jets[0][i] - dq[0][j] | |
dphi = jets[1][i] - dq[1][j] | |
distance_matrix[i, j] = np.sqrt(deta**2 + dphi**2) | |
# row-wise argmin | |
distance_matrix = distance_matrix.T | |
#min_distance = np.min(distance_matrix, axis=1) | |
n_jets = len(jets_object) | |
counts["n_jets"] += n_jets | |
counts["n_dark_quarks"] += len(data.matrix_element_gen_particles) | |
if len(jets_object): | |
quark_to_jet = np.min(distance_matrix, axis=1) | |
quark_to_jet[quark_to_jet > R] = -1 | |
counts["n_matched_dark_quarks"] += np.sum(quark_to_jet != -1) | |
if counts["n_jets"] != 0: | |
precision = counts["n_matched_dark_quarks"] / counts["n_jets"] | |
else: | |
precision = 0 | |
if counts["n_dark_quarks"] != 0: | |
recall = counts["n_matched_dark_quarks"] / counts["n_dark_quarks"] | |
else: | |
recall = 0 | |
if precision == 0 or recall == 0: | |
return 0 | |
f1_score = 2 * precision * recall / (precision + recall) | |
return f1_score | |
def compute_f1_score_from_result(result, dataset): | |
R = 0.8 | |
counts = { | |
"n_matched_dark_quarks": 0, | |
"n_jets": 0, | |
"n_dark_quarks": 0 | |
} # Array of [n_relevant_retrieved, all_retrieved, all_relevant], or in our language, [n_matched_dark_quarks, n_jets, n_dark_quarks] | |
result["event_idx_bounds"] = get_batch_bounds(result["event_idx"]) | |
l = result["event_idx"].max().int().item() | |
for x in tqdm(range(len(dataset))): | |
if x >= l: | |
break | |
data = dataset[x] | |
jets_object = EventDataset.get_model_jets_static(x, data.pfcands, result, result["model_cluster"]) | |
if jets_object is None: | |
continue | |
jets = [jets_object.eta, jets_object.phi] | |
dq = [data.matrix_element_gen_particles.eta, data.matrix_element_gen_particles.phi] | |
# calculate deltaR between each jet and each quark | |
distance_matrix = np.zeros((len(jets_object), len(data.matrix_element_gen_particles))) | |
for i in range(len(jets_object)): | |
for j in range(len(data.matrix_element_gen_particles)): | |
deta = jets[0][i] - dq[0][j] | |
dphi = jets[1][i] - dq[1][j] | |
distance_matrix[i, j] = np.sqrt(deta**2 + dphi**2) | |
# row-wise argmin | |
distance_matrix = distance_matrix.T | |
#min_distance = np.min(distance_matrix, axis=1) | |
n_jets = len(jets_object) | |
counts["n_jets"] += n_jets | |
counts["n_dark_quarks"] += len(data.matrix_element_gen_particles) | |
if len(jets_object): | |
quark_to_jet = np.min(distance_matrix, axis=1) | |
quark_to_jet[quark_to_jet > R] = -1 | |
counts["n_matched_dark_quarks"] += np.sum(quark_to_jet != -1) | |
if counts["n_jets"] == 0 or counts["n_dark_quarks"] == 0: | |
return 0 | |
precision = counts["n_matched_dark_quarks"] / counts["n_jets"] | |
recall = counts["n_matched_dark_quarks"] / counts["n_dark_quarks"] | |
f1_score = 2 * precision * recall / (precision + recall) | |
return f1_score | |