Abhishek Thakur commited on
Commit
20116b4
·
1 Parent(s): 2a67404

custom metrics

Browse files
Files changed (1) hide show
  1. competitions/compute_metrics.py +44 -37
competitions/compute_metrics.py CHANGED
@@ -1,4 +1,6 @@
1
- from functools import partial
 
 
2
 
3
  import pandas as pd
4
  from huggingface_hub import hf_hub_download
@@ -6,53 +8,58 @@ from sklearn import metrics
6
 
7
 
8
  def compute_metrics(params):
9
- solution_file = hf_hub_download(
10
- repo_id=params.competition_id,
11
- filename="solution.csv",
12
- token=params.token,
13
- repo_type="dataset",
14
- )
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- solution_df = pd.read_csv(solution_file)
17
 
18
- submission_filename = f"submissions/{params.team_id}-{params.submission_id}.csv"
19
- submission_file = hf_hub_download(
20
- repo_id=params.competition_id,
21
- filename=submission_filename,
22
- token=params.token,
23
- repo_type="dataset",
24
- )
25
- submission_df = pd.read_csv(submission_file)
26
 
27
- public_ids = solution_df[solution_df.split == "public"][params.submission_id_col].values
28
- private_ids = solution_df[solution_df.split == "private"][params.submission_id_col].values
29
 
30
- public_solution_df = solution_df[solution_df[params.submission_id_col].isin(public_ids)]
31
- public_submission_df = submission_df[submission_df[params.submission_id_col].isin(public_ids)]
32
 
33
- private_solution_df = solution_df[solution_df[params.submission_id_col].isin(private_ids)]
34
- private_submission_df = submission_df[submission_df[params.submission_id_col].isin(private_ids)]
35
 
36
- public_solution_df = public_solution_df.sort_values(params.submission_id_col).reset_index(drop=True)
37
- public_submission_df = public_submission_df.sort_values(params.submission_id_col).reset_index(drop=True)
38
 
39
- private_solution_df = private_solution_df.sort_values(params.submission_id_col).reset_index(drop=True)
40
- private_submission_df = private_submission_df.sort_values(params.submission_id_col).reset_index(drop=True)
41
 
42
- if params.metric == "f1-macro":
43
- _metric = partial(metrics.f1_score, average="macro")
44
- target_cols = [col for col in solution_df.columns if col not in [params.submission_id_col, "split"]]
45
- public_score = _metric(public_solution_df[target_cols], public_submission_df[target_cols])
46
- private_score = _metric(private_solution_df[target_cols], private_submission_df[target_cols])
47
- else:
48
  _metric = getattr(metrics, params.metric)
49
  target_cols = [col for col in solution_df.columns if col not in [params.submission_id_col, "split"]]
50
  public_score = _metric(public_solution_df[target_cols], public_submission_df[target_cols])
51
  private_score = _metric(private_solution_df[target_cols], private_submission_df[target_cols])
52
 
53
- # scores can also be dictionaries for multiple metrics
54
- evaluation = {
55
- "public_score": public_score,
56
- "private_score": private_score,
57
- }
58
  return evaluation
 
1
+ import importlib
2
+ import os
3
+ import sys
4
 
5
  import pandas as pd
6
  from huggingface_hub import hf_hub_download
 
8
 
9
 
10
  def compute_metrics(params):
11
+ if params.metric == "custom":
12
+ metric_file = hf_hub_download(
13
+ repo_id=params.competition_id,
14
+ filename="metric.py",
15
+ token=params.token,
16
+ repo_type="dataset",
17
+ )
18
+ sys.path.append(os.path.dirname(metric_file))
19
+ metric = importlib.import_module("metric")
20
+ evaluation = metric.compute(params)
21
+ else:
22
+ solution_file = hf_hub_download(
23
+ repo_id=params.competition_id,
24
+ filename="solution.csv",
25
+ token=params.token,
26
+ repo_type="dataset",
27
+ )
28
 
29
+ solution_df = pd.read_csv(solution_file)
30
 
31
+ submission_filename = f"submissions/{params.team_id}-{params.submission_id}.csv"
32
+ submission_file = hf_hub_download(
33
+ repo_id=params.competition_id,
34
+ filename=submission_filename,
35
+ token=params.token,
36
+ repo_type="dataset",
37
+ )
38
+ submission_df = pd.read_csv(submission_file)
39
 
40
+ public_ids = solution_df[solution_df.split == "public"][params.submission_id_col].values
41
+ private_ids = solution_df[solution_df.split == "private"][params.submission_id_col].values
42
 
43
+ public_solution_df = solution_df[solution_df[params.submission_id_col].isin(public_ids)]
44
+ public_submission_df = submission_df[submission_df[params.submission_id_col].isin(public_ids)]
45
 
46
+ private_solution_df = solution_df[solution_df[params.submission_id_col].isin(private_ids)]
47
+ private_submission_df = submission_df[submission_df[params.submission_id_col].isin(private_ids)]
48
 
49
+ public_solution_df = public_solution_df.sort_values(params.submission_id_col).reset_index(drop=True)
50
+ public_submission_df = public_submission_df.sort_values(params.submission_id_col).reset_index(drop=True)
51
 
52
+ private_solution_df = private_solution_df.sort_values(params.submission_id_col).reset_index(drop=True)
53
+ private_submission_df = private_submission_df.sort_values(params.submission_id_col).reset_index(drop=True)
54
 
 
 
 
 
 
 
55
  _metric = getattr(metrics, params.metric)
56
  target_cols = [col for col in solution_df.columns if col not in [params.submission_id_col, "split"]]
57
  public_score = _metric(public_solution_df[target_cols], public_submission_df[target_cols])
58
  private_score = _metric(private_solution_df[target_cols], private_submission_df[target_cols])
59
 
60
+ # scores can also be dictionaries for multiple metrics
61
+ evaluation = {
62
+ "public_score": public_score,
63
+ "private_score": private_score,
64
+ }
65
  return evaluation