Abhishek Thakur commited on
Commit
bca2446
·
1 Parent(s): 3ea1b9b

fix private lb logic

Browse files
competitions/competitions.py CHANGED
@@ -12,6 +12,7 @@ from .text import NO_SUBMISSIONS, SUBMISSION_SELECTION_TEXT, SUBMISSION_TEXT
12
  leaderboard = Leaderboard(
13
  end_date=competition_info.end_date,
14
  eval_higher_is_better=competition_info.eval_higher_is_better,
 
15
  competition_id=COMPETITION_ID,
16
  autotrain_token=AUTOTRAIN_TOKEN,
17
  )
@@ -29,8 +30,23 @@ submissions = Submissions(
29
  def _my_submissions(user_token):
30
  df = submissions.my_submissions(user_token)
31
  if len(df) == 0:
32
- return [gr.Markdown.update(visible=True, value=NO_SUBMISSIONS), gr.DataFrame.update(visible=False)]
33
- return [gr.Markdown.update(visible=False), gr.DataFrame.update(visible=True, value=df)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
 
36
  with gr.Blocks() as demo:
@@ -64,11 +80,12 @@ with gr.Blocks() as demo:
64
  user_token = gr.Textbox(max_lines=1, value="hf_XXX", label="Please enter your Hugging Face token")
65
  output_text = gr.Markdown(visible=True, show_label=False)
66
  output_df = gr.DataFrame(visible=False)
 
67
  my_subs_button = gr.Button("Fetch Submissions")
68
  my_subs_button.click(
69
  fn=_my_submissions,
70
  inputs=[user_token],
71
- outputs=[output_text, output_df],
72
  )
73
 
74
  fetch_lb_partial = partial(leaderboard.fetch, private=False)
 
12
  leaderboard = Leaderboard(
13
  end_date=competition_info.end_date,
14
  eval_higher_is_better=competition_info.eval_higher_is_better,
15
+ max_selected_submissions=competition_info.selection_limit,
16
  competition_id=COMPETITION_ID,
17
  autotrain_token=AUTOTRAIN_TOKEN,
18
  )
 
30
  def _my_submissions(user_token):
31
  df = submissions.my_submissions(user_token)
32
  if len(df) == 0:
33
+ return [
34
+ gr.Markdown.update(visible=True, value=NO_SUBMISSIONS),
35
+ gr.DataFrame.update(visible=False),
36
+ gr.TextArea.update(visible=False),
37
+ ]
38
+ selected_submission_ids = df[df["selected"] is True]["submission_id"].values.tolist()
39
+ if len(selected_submission_ids) > 0:
40
+ return [
41
+ gr.Markdown.update(visible=True),
42
+ gr.DataFrame.update(visible=True, data=df),
43
+ gr.TextArea.update(visible=False, value="\n".join(selected_submission_ids)),
44
+ ]
45
+ return [
46
+ gr.Markdown.update(visible=False),
47
+ gr.DataFrame.update(visible=True, value=df),
48
+ gr.TextArea.update(visible=True),
49
+ ]
50
 
51
 
52
  with gr.Blocks() as demo:
 
80
  user_token = gr.Textbox(max_lines=1, value="hf_XXX", label="Please enter your Hugging Face token")
81
  output_text = gr.Markdown(visible=True, show_label=False)
82
  output_df = gr.DataFrame(visible=False)
83
+ selected_submissions = gr.TextArea(visible=False, label="Selected Submissions")
84
  my_subs_button = gr.Button("Fetch Submissions")
85
  my_subs_button.click(
86
  fn=_my_submissions,
87
  inputs=[user_token],
88
+ outputs=[output_text, output_df, selected_submissions],
89
  )
90
 
91
  fetch_lb_partial = partial(leaderboard.fetch, private=False)
competitions/leaderboard.py CHANGED
@@ -12,6 +12,7 @@ from huggingface_hub import snapshot_download
12
  class Leaderboard:
13
  end_date: datetime
14
  eval_higher_is_better: bool
 
15
  competition_id: str
16
  autotrain_token: str
17
 
@@ -29,7 +30,7 @@ class Leaderboard:
29
  "submission_datetime",
30
  ]
31
 
32
- def _download_submissions(self, private):
33
  submissions_folder = snapshot_download(
34
  repo_id=self.competition_id,
35
  allow_patterns="*.json",
@@ -40,14 +41,10 @@ class Leaderboard:
40
  for submission in glob.glob(os.path.join(submissions_folder, "*.json")):
41
  with open(submission, "r") as f:
42
  submission_info = json.load(f)
43
- if self.eval_higher_is_better:
44
- submission_info["submissions"].sort(
45
- key=lambda x: x["private_score"] if private else x["public_score"],
46
- reverse=True,
47
- )
48
- else:
49
- submission_info["submissions"].sort(key=lambda x: x["private_score"] if private else x["public_score"])
50
-
51
  # select only the best submission
52
  submission_info["submissions"] = submission_info["submissions"][0]
53
  temp_info = {
@@ -58,15 +55,84 @@ class Leaderboard:
58
  "status": submission_info["submissions"]["status"],
59
  "selected": submission_info["submissions"]["selected"],
60
  "public_score": submission_info["submissions"]["public_score"],
61
- "private_score": submission_info["submissions"]["private_score"],
62
  "submission_date": submission_info["submissions"]["date"],
63
  "submission_time": submission_info["submissions"]["time"],
64
  }
65
  submissions.append(temp_info)
66
  return submissions
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  def fetch(self, private=False):
69
- submissions = self._download_submissions(private)
 
 
 
70
 
71
  if len(submissions) == 0:
72
  return pd.DataFrame()
@@ -108,8 +174,10 @@ class Leaderboard:
108
  )
109
 
110
  # only keep 4 significant digits in the score
111
- df["public_score"] = df["public_score"].apply(lambda x: round(x, 4))
112
- df["private_score"] = df["private_score"].apply(lambda x: round(x, 4))
 
 
113
 
114
  # reset index
115
  df = df.reset_index(drop=True)
 
12
  class Leaderboard:
13
  end_date: datetime
14
  eval_higher_is_better: bool
15
+ max_selected_submissions: int
16
  competition_id: str
17
  autotrain_token: str
18
 
 
30
  "submission_datetime",
31
  ]
32
 
33
+ def _process_public_lb(self):
34
  submissions_folder = snapshot_download(
35
  repo_id=self.competition_id,
36
  allow_patterns="*.json",
 
41
  for submission in glob.glob(os.path.join(submissions_folder, "*.json")):
42
  with open(submission, "r") as f:
43
  submission_info = json.load(f)
44
+ submission_info["submissions"].sort(
45
+ key=lambda x: x["public_score"],
46
+ reverse=True if self.eval_higher_is_better else False,
47
+ )
 
 
 
 
48
  # select only the best submission
49
  submission_info["submissions"] = submission_info["submissions"][0]
50
  temp_info = {
 
55
  "status": submission_info["submissions"]["status"],
56
  "selected": submission_info["submissions"]["selected"],
57
  "public_score": submission_info["submissions"]["public_score"],
58
+ # "private_score": submission_info["submissions"]["private_score"],
59
  "submission_date": submission_info["submissions"]["date"],
60
  "submission_time": submission_info["submissions"]["time"],
61
  }
62
  submissions.append(temp_info)
63
  return submissions
64
 
65
+ def _process_private_lb(self):
66
+ submissions_folder = snapshot_download(
67
+ repo_id=self.competition_id,
68
+ allow_patterns="*.json",
69
+ use_auth_token=self.autotrain_token,
70
+ repo_type="dataset",
71
+ )
72
+ submissions = []
73
+ for submission in glob.glob(os.path.join(submissions_folder, "*.json")):
74
+ with open(submission, "r") as f:
75
+ submission_info = json.load(f)
76
+ # count the number of submissions which are selected
77
+ selected_submissions = 0
78
+ for sub in submission_info["submissions"]:
79
+ if sub["selected"]:
80
+ selected_submissions += 1
81
+ if selected_submissions == 0:
82
+ # select submissions with best public score
83
+ submission_info["submissions"].sort(
84
+ key=lambda x: x["public_score"],
85
+ reverse=True if self.eval_higher_is_better else False,
86
+ )
87
+ # select only the best submission
88
+ submission_info["submissions"] = submission_info["submissions"][0]
89
+ elif selected_submissions == self.max_selected_submissions:
90
+ # select only the selected submissions
91
+ submission_info["submissions"] = [sub for sub in submission_info["submissions"] if sub["selected"]]
92
+ # sort by private score
93
+ submission_info["submissions"].sort(
94
+ key=lambda x: x["private_score"],
95
+ reverse=True if self.eval_higher_is_better else False,
96
+ )
97
+ # select only the best submission
98
+ submission_info["submissions"] = submission_info["submissions"][0]
99
+ else:
100
+ temp_selected_submissions = [sub for sub in submission_info["submissions"] if sub["selected"]]
101
+ temp_best_public_submissions = [
102
+ sub for sub in submission_info["submissions"] if not sub["selected"]
103
+ ]
104
+ temp_best_public_submissions.sort(
105
+ key=lambda x: x["public_score"],
106
+ reverse=True if self.eval_higher_is_better else False,
107
+ )
108
+ missing_candidates = self.max_selected_submissions - temp_selected_submissions
109
+ temp_best_public_submissions = temp_best_public_submissions[:missing_candidates]
110
+ submission_info["submissions"] = temp_selected_submissions + temp_best_public_submissions
111
+ submission_info["submissions"].sort(
112
+ key=lambda x: x["private_score"],
113
+ reverse=True if self.eval_higher_is_better else False,
114
+ )
115
+ submission_info["submissions"] = submission_info["submissions"][0]
116
+
117
+ temp_info = {
118
+ "id": submission_info["id"],
119
+ "name": submission_info["name"],
120
+ "submission_id": submission_info["submissions"]["submission_id"],
121
+ "submission_comment": submission_info["submissions"]["submission_comment"],
122
+ "status": submission_info["submissions"]["status"],
123
+ "selected": submission_info["submissions"]["selected"],
124
+ "private_score": submission_info["submissions"]["private_score"],
125
+ "submission_date": submission_info["submissions"]["date"],
126
+ "submission_time": submission_info["submissions"]["time"],
127
+ }
128
+ submissions.append(temp_info)
129
+ return submissions
130
+
131
  def fetch(self, private=False):
132
+ if private:
133
+ submissions = self._process_private_lb()
134
+ else:
135
+ submissions = self._process_public_lb()
136
 
137
  if len(submissions) == 0:
138
  return pd.DataFrame()
 
174
  )
175
 
176
  # only keep 4 significant digits in the score
177
+ if private:
178
+ df["private_score"] = df["private_score"].apply(lambda x: round(x, 4))
179
+ else:
180
+ df["public_score"] = df["public_score"].apply(lambda x: round(x, 4))
181
 
182
  # reset index
183
  df = df.reset_index(drop=True)