Spaces:
Sleeping
Sleeping
import numpy as np | |
import pandas as pd | |
from sklearn import metrics | |
np.seterr(divide="ignore", invalid="ignore") | |
class AverageMeter: | |
def __init__(self): | |
self.reset() | |
def reset(self): | |
self.val = 0 | |
self.avg = 0 | |
self.sum = 0 | |
self.count = 0 | |
def update(self, val, n=1): | |
self.val = val | |
self.sum += val * n | |
self.count += n | |
self.avg = self.sum / self.count | |
class F1Meter: | |
def __init__(self, average="binary"): | |
self.average = average | |
self.reset() | |
def update(self, y_true, y_pred): | |
self.y_true = np.concatenate([self.y_true, y_true]) | |
self.y_pred = np.concatenate([self.y_pred, y_pred]) | |
self.avg = metrics.f1_score(self.y_true, self.y_pred, average=self.average) | |
def reset(self): | |
self.y_true = np.array([]) | |
self.y_pred = np.array([]) | |
class SensitivityMeter: | |
def __init__(self, average="binary"): | |
self.average = average | |
self.reset() | |
def update(self, y_true, y_pred): | |
self.y_true = np.concatenate([self.y_true, y_true]) | |
self.y_pred = np.concatenate([self.y_pred, y_pred]) | |
self.avg = metrics.recall_score( | |
self.y_true, self.y_pred, pos_label=1, average=self.average | |
) | |
def reset(self): | |
self.y_true = np.array([]) | |
self.y_pred = np.array([]) | |
class SpecificityMeter: | |
def __init__(self, average="binary"): | |
self.average = average | |
self.reset() | |
def update(self, y_true, y_pred): | |
self.y_true = np.concatenate([self.y_true, y_true]) | |
self.y_pred = np.concatenate([self.y_pred, y_pred]) | |
self.avg = metrics.recall_score( | |
self.y_true, self.y_pred, pos_label=0, average=self.average | |
) | |
def reset(self): | |
self.y_true = np.array([]) | |
self.y_pred = np.array([]) | |
class AccuracyMeter: | |
def __init__(self): | |
self.reset() | |
def update(self, y_true, y_pred): | |
self.y_true = np.concatenate([self.y_true, y_true]) | |
self.y_pred = np.concatenate([self.y_pred, y_pred]) | |
self.avg = metrics.balanced_accuracy_score(self.y_true, self.y_pred) | |
def reset(self): | |
self.y_true = np.array([]) | |
self.y_pred = np.array([]) | |
def get_part_result(test_pred_df): | |
# Create `singer` column to store whether the singer is seen or unseen | |
test_pred_df["singer"] = test_pred_df.artist_overlap.map( | |
lambda x: "seen" if x else "unseen" | |
) | |
# Create `fake_type` column to store different types of fake songs | |
test_pred_df["fake_type"] = test_pred_df.label | |
# Create `length` column to store different duration type songs | |
test_pred_df["length"] = test_pred_df["duration_part"] = test_pred_df[ | |
"duration" | |
].map(lambda t: "short" if t <= 60 else ("long" if t > 120 else "medium")) | |
# Initialize an empty DataFrame to store results | |
part_result_df = pd.DataFrame() | |
# Loop through the specified categories | |
for cat in ["algorithm", "singer", "fake_type", "length"]: | |
# Filter the dataframe based on the condition for each category | |
if cat in ["algorithm", "fake_type"]: | |
cat_df = test_pred_df.query("target == 1") | |
elif cat == "singer": | |
cat_df = test_pred_df.query("target == 0") | |
else: | |
cat_df = test_pred_df.copy() | |
# Compute metrics for each partition | |
for part in cat_df[cat].unique(): | |
part_df = cat_df[cat_df[cat] == part] | |
y_true = part_df.y_true.values.astype(int) | |
y_pred = (part_df.y_pred.values > 0.5).astype(int) | |
# Compute TPR for `algorithm`, `fake_type`; TNR for `singer` and F1 for `length` | |
score = ( | |
metrics.recall_score( | |
y_true, y_pred, pos_label=1 if cat != "singer" else 0 | |
) | |
if cat != "length" | |
else metrics.f1_score(y_true, y_pred, average="macro") | |
) | |
# Create a DataFrame for the current result | |
result_df = pd.DataFrame( | |
{ | |
"category": [cat], | |
"partition": [part], | |
"score": [score], | |
"size": [len(part_df)], | |
} | |
) | |
# Concatenate the result with the existing DataFrame | |
part_result_df = pd.concat([part_result_df, result_df], ignore_index=True) | |
# Create a dictionary with the results | |
result_dict = { | |
f"{row['category']}/{row['partition']}": row["score"] | |
for _, row in part_result_df.iterrows() | |
} | |
return part_result_df, result_dict | |