fix: make label_columns explicit parameter in label_row_soft
Browse files- train_abuse_model.py +1 -1
- utils.py +1 -1
train_abuse_model.py
CHANGED
@@ -176,7 +176,7 @@ logger.info(np.shape(df))
|
|
176 |
df = df.dropna(subset=[text_column])
|
177 |
logger.info(np.shape(df))
|
178 |
|
179 |
-
df["label_vector"] = df.apply(label_row_soft, axis=1)
|
180 |
label_matrix = df["label_vector"].tolist()
|
181 |
|
182 |
# Proper 3-way split: train / val / test
|
|
|
176 |
df = df.dropna(subset=[text_column])
|
177 |
logger.info(np.shape(df))
|
178 |
|
179 |
+
df["label_vector"] = df.apply(lambda row: label_row_soft(row, label_columns), axis=1)
|
180 |
label_matrix = df["label_vector"].tolist()
|
181 |
|
182 |
# Proper 3-way split: train / val / test
|
utils.py
CHANGED
@@ -71,7 +71,7 @@ def tune_thresholds(probs, true_labels, verbose=True):
|
|
71 |
return best_low, best_high, best_macro_f1
|
72 |
|
73 |
# Convert label values to soft scores: "yes" = 1.0, "plausibly" = 0.5, others = 0.0
|
74 |
-
def label_row_soft(row):
|
75 |
labels = []
|
76 |
for col in label_columns:
|
77 |
val = str(row[col]).strip().lower()
|
|
|
71 |
return best_low, best_high, best_macro_f1
|
72 |
|
73 |
# Convert label values to soft scores: "yes" = 1.0, "plausibly" = 0.5, others = 0.0
|
74 |
+
def label_row_soft(row, label_columns):
|
75 |
labels = []
|
76 |
for col in label_columns:
|
77 |
val = str(row[col]).strip().lower()
|