Abhishek Thakur
commited on
Commit
·
20116b4
1
Parent(s):
2a67404
custom metrics
Browse files- competitions/compute_metrics.py +44 -37
competitions/compute_metrics.py
CHANGED
|
@@ -1,4 +1,6 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
| 2 |
|
| 3 |
import pandas as pd
|
| 4 |
from huggingface_hub import hf_hub_download
|
|
@@ -6,53 +8,58 @@ from sklearn import metrics
|
|
| 6 |
|
| 7 |
|
| 8 |
def compute_metrics(params):
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
-
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
|
| 27 |
-
|
| 28 |
-
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
| 32 |
|
| 33 |
-
|
| 34 |
-
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
| 38 |
|
| 39 |
-
|
| 40 |
-
|
| 41 |
|
| 42 |
-
if params.metric == "f1-macro":
|
| 43 |
-
_metric = partial(metrics.f1_score, average="macro")
|
| 44 |
-
target_cols = [col for col in solution_df.columns if col not in [params.submission_id_col, "split"]]
|
| 45 |
-
public_score = _metric(public_solution_df[target_cols], public_submission_df[target_cols])
|
| 46 |
-
private_score = _metric(private_solution_df[target_cols], private_submission_df[target_cols])
|
| 47 |
-
else:
|
| 48 |
_metric = getattr(metrics, params.metric)
|
| 49 |
target_cols = [col for col in solution_df.columns if col not in [params.submission_id_col, "split"]]
|
| 50 |
public_score = _metric(public_solution_df[target_cols], public_submission_df[target_cols])
|
| 51 |
private_score = _metric(private_solution_df[target_cols], private_submission_df[target_cols])
|
| 52 |
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
return evaluation
|
|
|
|
| 1 |
+
import importlib
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
|
| 5 |
import pandas as pd
|
| 6 |
from huggingface_hub import hf_hub_download
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
def compute_metrics(params):
|
| 11 |
+
if params.metric == "custom":
|
| 12 |
+
metric_file = hf_hub_download(
|
| 13 |
+
repo_id=params.competition_id,
|
| 14 |
+
filename="metric.py",
|
| 15 |
+
token=params.token,
|
| 16 |
+
repo_type="dataset",
|
| 17 |
+
)
|
| 18 |
+
sys.path.append(os.path.dirname(metric_file))
|
| 19 |
+
metric = importlib.import_module("metric")
|
| 20 |
+
evaluation = metric.compute(params)
|
| 21 |
+
else:
|
| 22 |
+
solution_file = hf_hub_download(
|
| 23 |
+
repo_id=params.competition_id,
|
| 24 |
+
filename="solution.csv",
|
| 25 |
+
token=params.token,
|
| 26 |
+
repo_type="dataset",
|
| 27 |
+
)
|
| 28 |
|
| 29 |
+
solution_df = pd.read_csv(solution_file)
|
| 30 |
|
| 31 |
+
submission_filename = f"submissions/{params.team_id}-{params.submission_id}.csv"
|
| 32 |
+
submission_file = hf_hub_download(
|
| 33 |
+
repo_id=params.competition_id,
|
| 34 |
+
filename=submission_filename,
|
| 35 |
+
token=params.token,
|
| 36 |
+
repo_type="dataset",
|
| 37 |
+
)
|
| 38 |
+
submission_df = pd.read_csv(submission_file)
|
| 39 |
|
| 40 |
+
public_ids = solution_df[solution_df.split == "public"][params.submission_id_col].values
|
| 41 |
+
private_ids = solution_df[solution_df.split == "private"][params.submission_id_col].values
|
| 42 |
|
| 43 |
+
public_solution_df = solution_df[solution_df[params.submission_id_col].isin(public_ids)]
|
| 44 |
+
public_submission_df = submission_df[submission_df[params.submission_id_col].isin(public_ids)]
|
| 45 |
|
| 46 |
+
private_solution_df = solution_df[solution_df[params.submission_id_col].isin(private_ids)]
|
| 47 |
+
private_submission_df = submission_df[submission_df[params.submission_id_col].isin(private_ids)]
|
| 48 |
|
| 49 |
+
public_solution_df = public_solution_df.sort_values(params.submission_id_col).reset_index(drop=True)
|
| 50 |
+
public_submission_df = public_submission_df.sort_values(params.submission_id_col).reset_index(drop=True)
|
| 51 |
|
| 52 |
+
private_solution_df = private_solution_df.sort_values(params.submission_id_col).reset_index(drop=True)
|
| 53 |
+
private_submission_df = private_submission_df.sort_values(params.submission_id_col).reset_index(drop=True)
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
_metric = getattr(metrics, params.metric)
|
| 56 |
target_cols = [col for col in solution_df.columns if col not in [params.submission_id_col, "split"]]
|
| 57 |
public_score = _metric(public_solution_df[target_cols], public_submission_df[target_cols])
|
| 58 |
private_score = _metric(private_solution_df[target_cols], private_submission_df[target_cols])
|
| 59 |
|
| 60 |
+
# scores can also be dictionaries for multiple metrics
|
| 61 |
+
evaluation = {
|
| 62 |
+
"public_score": public_score,
|
| 63 |
+
"private_score": private_score,
|
| 64 |
+
}
|
| 65 |
return evaluation
|