Lakoc commited on
Commit
034cbdf
·
1 Parent(s): 7da0d34

missing server script

Browse files
Files changed (1) hide show
  1. leaderboard_server.py +92 -0
leaderboard_server.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+
4
+ import meeteval.io
5
+ import pandas as pd
6
+ from utils import calc_wer, aggregate_wer_metrics
7
+ from txt_norm import get_text_norm
8
+
9
+ # Constants
10
+ REFERENCE_BASE_PATH = "./references" # e.g. ./references/single_channel/dataset1.json
11
+ TASKS_METADATA_PATH = "./tasks_metadata.json"
12
+
13
+
14
+ class LeaderboardServer:
15
+ def __init__(self):
16
+ self.local_leaderboard = os.path.abspath("submissions")
17
+ self.submisssion_id_to_file = {} # Maps model_id to filepath
18
+ self.tasks_metadata = json.load(open("tasks_metadata.json"))["tasks"]
19
+ self.submission_ids = set()
20
+ self.results_file = os.path.join(self.local_leaderboard, "results.json")
21
+ os.makedirs(self.local_leaderboard, exist_ok=True)
22
+ self.fetch_existing_models()
23
+ self.text_normalizer = get_text_norm("whisper_nsf")
24
+
25
+ def fetch_existing_models(self):
26
+ self.submisssion_id_to_file.clear()
27
+ self.submission_ids.clear()
28
+
29
+ results_path = os.path.join(self.local_leaderboard, "results.json")
30
+ if not os.path.exists(results_path):
31
+ return
32
+ with open(results_path) as f:
33
+ results = json.load(f)
34
+ for model_id in results:
35
+ self.submission_ids.add(model_id)
36
+ hyp_path = os.path.join(self.local_leaderboard, f"{model_id}_hyp.json")
37
+ self.submisssion_id_to_file[model_id] = hyp_path
38
+
39
+ def prepare_model_for_submission(self, file, metadata, task, datasets):
40
+ submitted_by = metadata["submitted_by"]
41
+ model_id = metadata["model_id"]
42
+
43
+ # Run WER eval
44
+ results = {}
45
+ hyp_seglst = meeteval.io.load(file)
46
+
47
+ for dataset in datasets:
48
+ ref_path = os.path.join("references", task, f"{dataset}.json")
49
+ ref_seglst = meeteval.io.load(ref_path)
50
+ sessions = ref_seglst.unique('session_id')
51
+ local_hyps = hyp_seglst.filter(lambda seg: seg['session_id'] in sessions)
52
+ ref_seglst = ref_seglst.map(lambda seg: {**seg, "words":self.text_normalizer(seg["words"])})
53
+ local_hyps = local_hyps.map(lambda seg: {**seg, "words":self.text_normalizer(seg["words"])})
54
+ per_session_wers = calc_wer(tcp_hyp_seglst=local_hyps, ref_seglst=ref_seglst, collar=5, metrics_list=["tcp_wer"])
55
+ metrics = aggregate_wer_metrics(per_session_wers, ["tcp_wer"])
56
+ results[dataset] = metrics
57
+
58
+ # Update results file
59
+ results_path = os.path.join(self.local_leaderboard, "results.json")
60
+ if os.path.exists(results_path):
61
+ with open(results_path) as f:
62
+ all_results = json.load(f)
63
+ else:
64
+ all_results = {}
65
+
66
+ all_results[model_id] = {
67
+ "submitted_by": submitted_by,
68
+ "results": results
69
+ }
70
+
71
+ with open(results_path, "w") as f:
72
+ json.dump(all_results, f, indent=2)
73
+
74
+ def update_leaderboard(self):
75
+ self.fetch_existing_models()
76
+
77
+ def get_leaderboard(self):
78
+ results_path = os.path.join(self.local_leaderboard, "results.json")
79
+ if not os.path.exists(results_path):
80
+ return pd.DataFrame(columns=["No submissions yet"])
81
+
82
+ with open(results_path) as f:
83
+ results = json.load(f)
84
+
85
+ rows = []
86
+ for model_id, content in results.items():
87
+ row = {"Model ID": model_id, "Submitted by": content["submitted_by"]}
88
+ for k, v in content["results"].items():
89
+ row[k] = v.get("tcp_wer", None)
90
+ rows.append(row)
91
+
92
+ return pd.DataFrame(rows)