Abhishek Thakur commited on
Commit
cb7ab1b
·
1 Parent(s): 6a5164b

competition creator

Browse files
Files changed (2) hide show
  1. competitions/__init__.py +4 -1
  2. competitions/create.py +173 -62
competitions/__init__.py CHANGED
@@ -12,4 +12,7 @@ AUTOTRAIN_TOKEN = os.getenv("AUTOTRAIN_TOKEN")
12
  AUTOTRAIN_BACKEND_API = os.getenv("AUTOTRAIN_BACKEND_API", "https://api.autotrain.huggingface.co")
13
  BOT_TOKEN = os.getenv("BOT_TOKEN")
14
 
15
- competition_info = CompetitionInfo(competition_id=COMPETITION_ID, autotrain_token=AUTOTRAIN_TOKEN)
 
 
 
 
12
  AUTOTRAIN_BACKEND_API = os.getenv("AUTOTRAIN_BACKEND_API", "https://api.autotrain.huggingface.co")
13
  BOT_TOKEN = os.getenv("BOT_TOKEN")
14
 
15
+ if COMPETITION_ID is not None:
16
+ competition_info = CompetitionInfo(competition_id=COMPETITION_ID, autotrain_token=AUTOTRAIN_TOKEN)
17
+ else:
18
+ competition_info = None
competitions/create.py CHANGED
@@ -10,20 +10,44 @@ from . import BOT_TOKEN
10
  from .utils import user_authentication
11
 
12
 
13
- """
14
- To create a competition, follow these steps:
15
- 1. create a private dataset which has the following structure:
16
- - conf.json
17
- - solution.csv
18
- - COMPETITION_DESC.md
19
- - DATASET_DESC.md
20
- 2. create a public dataset which consists of the following files:
21
- - sample_submission.csv
22
- - test.csv
23
- - train.csv
24
- - anything else
25
- 3. create a competition space
26
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
 
29
  def create_competition(
@@ -38,20 +62,56 @@ def create_competition(
38
  sample_submission_file,
39
  solution_file,
40
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  # generate a random id
42
  suffix = str(uuid.uuid4())
43
  private_dataset_name = f"{who_pays}/{competition_name}{suffix}"
44
  public_dataset_name = f"{who_pays}/{competition_name}"
45
  space_name = f"competitions/{competition_name}"
46
 
47
- sample_submission_df = pd.read_csv(sample_submission_file.name, nrows=10)
48
  submission_columns = ",".join(sample_submission_df.columns)
49
 
50
  conf = {
 
51
  "SUBMISSION_LIMIT": submission_limit,
52
  "SELECTION_LIMIT": selection_limit,
53
  "END_DATE": end_date,
54
- "EVAL_HIGHER_IS_BETTER": True,
55
  "COMPETITION_NAME": competition_name,
56
  "SUBMISSION_ID_COLUMN": "id",
57
  "SUBMISSION_COLUMNS": submission_columns,
@@ -62,13 +122,24 @@ def create_competition(
62
  api = HfApi()
63
 
64
  # create private dataset repo
65
- create_repo(
66
- repo_id=private_dataset_name,
67
- repo_type="dataset",
68
- private=True,
69
- token=user_token,
70
- exist_ok=False,
71
- )
 
 
 
 
 
 
 
 
 
 
 
72
  competition_desc = f"""
73
  # Welcome to {competition_name}
74
 
@@ -123,39 +194,48 @@ def create_competition(
123
  token=user_token,
124
  )
125
 
126
- if solution_file is not None:
 
 
 
 
 
 
 
 
 
127
 
128
- with open(solution_file.name, "rb") as f:
129
- solution_bytes_data = f.read()
130
- # upload solution file
131
- api.upload_file(
132
- path_or_fileobj=solution_bytes_data,
133
- path_in_repo="solution.csv",
134
- repo_id=private_dataset_name,
135
  repo_type="dataset",
 
136
  token=user_token,
 
 
 
 
 
 
 
 
 
 
 
137
  )
138
 
139
- # create public dataset repo
140
- create_repo(
 
 
 
 
 
141
  repo_id=public_dataset_name,
142
  repo_type="dataset",
143
- private=False,
144
  token=user_token,
145
- exist_ok=False,
146
  )
147
- if sample_submission_file is not None:
148
- # upload sample submission file
149
- with open(sample_submission_file.name, "rb") as f:
150
- sample_submission_bytes_data = f.read()
151
-
152
- api.upload_file(
153
- path_or_fileobj=sample_submission_bytes_data,
154
- path_in_repo="sample_submission.csv",
155
- repo_id=public_dataset_name,
156
- repo_type="dataset",
157
- token=user_token,
158
- )
159
 
160
  dockerfile = """
161
  FROM huggingface/competitions:latest
@@ -198,7 +278,6 @@ def create_competition(
198
  """
199
  space_readme = space_readme.strip()
200
  space_readme = space_readme.replace(" ", "")
201
- print(repr(space_readme))
202
 
203
  # upload space readme
204
  space_readme_bytes = space_readme.encode("utf-8")
@@ -231,6 +310,19 @@ def create_competition(
231
  token=BOT_TOKEN,
232
  )
233
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
 
235
  def check_if_user_can_create_competition(user_token):
236
  """
@@ -239,7 +331,6 @@ def check_if_user_can_create_competition(user_token):
239
  :return: True if the user can create a competition, False otherwise
240
  """
241
  user_info = user_authentication(user_token)
242
- print(user_info)
243
  return_msg = None
244
  if "error" in user_info:
245
  return_msg = "Invalid token. You can find your HF token here: https://huggingface.co/settings/tokens"
@@ -247,9 +338,6 @@ def check_if_user_can_create_competition(user_token):
247
  elif user_info["auth"]["accessToken"]["role"] != "write":
248
  return_msg = "Please provide a token with write access"
249
 
250
- elif user_info["canPay"] is False:
251
- return_msg = "Please add a valid payment method in order to create and manage a competition"
252
-
253
  if return_msg is not None:
254
  return [
255
  gr.Box.update(visible=False),
@@ -257,23 +345,45 @@ def check_if_user_can_create_competition(user_token):
257
  gr.Dropdown.update(visible=False),
258
  ]
259
 
260
- username = user_info["name"]
261
- user_id = user_info["id"]
262
-
263
  orgs = user_info["orgs"]
264
  valid_orgs = [org for org in orgs if org["canPay"] is True]
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  valid_orgs = [org for org in valid_orgs if org["roleInOrg"] in ("admin", "write")]
266
 
267
- valid_entities = {org["id"]: org["name"] for org in valid_orgs}
268
- valid_entities[user_id] = username
 
 
 
 
 
 
 
 
 
269
 
270
- # reverse the dictionary
271
- valid_entities = {v: k for k, v in valid_entities.items()}
272
 
273
  return [
274
  gr.Box.update(visible=True),
275
  gr.Markdown.update(value="", visible=False),
276
- gr.Dropdown.update(choices=list(valid_entities.keys()), visible=True, value=username),
 
 
 
 
277
  ]
278
 
279
 
@@ -361,6 +471,8 @@ with gr.Blocks() as demo:
361
  with gr.Row():
362
  create_button = gr.Button("Create Competition")
363
 
 
 
364
  login_button.click(
365
  check_if_user_can_create_competition, inputs=[user_token], outputs=[create_box, message_box, who_pays]
366
  )
@@ -377,5 +489,4 @@ with gr.Blocks() as demo:
377
  sample_submission_file,
378
  solution_file,
379
  ]
380
- print(create_inputs)
381
- create_button.click(create_competition, inputs=create_inputs, outputs=[message_box])
 
10
  from .utils import user_authentication
11
 
12
 
13
+ def verify_sample_and_solution(sample_submission, solution):
14
+ sample_submission = pd.read_csv(sample_submission.name)
15
+ solution = pd.read_csv(solution.name)
16
+
17
+ # check if both contain an id column
18
+ if "id" not in sample_submission.columns:
19
+ raise Exception("Sample submission should contain an id column")
20
+
21
+ if "id" not in solution.columns:
22
+ raise Exception("Solution file should contain an id column")
23
+
24
+ # check if both files have the same ids
25
+ if not (sample_submission["id"] == solution["id"]).all():
26
+ raise Exception("Sample submission and solution should have the same ids")
27
+
28
+ # check if both files have the same number of rows
29
+ if sample_submission.shape[0] != solution.shape[0]:
30
+ raise Exception("Sample submission and solution should have the same number of rows")
31
+
32
+ # check if solution contains a split column
33
+ if "split" not in solution.columns:
34
+ raise Exception("Solution file should contain a split column")
35
+
36
+ # check if split column contains only two unique values
37
+ if len(solution["split"].unique()) != 2:
38
+ raise Exception("Split column should contain only two unique values: public and private")
39
+
40
+ # check if unique values are public and private
41
+ if not set(solution["split"].unique()) == set(["public", "private"]):
42
+ raise Exception("Split column should contain only two unique values: public and private")
43
+
44
+ # except the `split` column, all other columns should be the same
45
+ solution_columns = solution.columns.tolist()
46
+ solution_columns.remove("split")
47
+ if not (sample_submission.columns == solution_columns).all():
48
+ raise Exception("Sample submission and solution should have the same columns, except for the split column")
49
+
50
+ return True
51
 
52
 
53
  def create_competition(
 
62
  sample_submission_file,
63
  solution_file,
64
  ):
65
+
66
+ # verify sample submission and solution
67
+ try:
68
+ verify_sample_and_solution(sample_submission_file, solution_file)
69
+ except Exception as e:
70
+ return gr.Markdown.update(
71
+ value=f"""
72
+ <div style="text-align: center">
73
+ <h4>Invalid sample submission or solution file</h4>
74
+ <p>{e}</p>
75
+ </div>
76
+ """,
77
+ visible=True,
78
+ )
79
+
80
+ # check if end_date is valid format: YYYY-MM-DD and in the future
81
+ try:
82
+ if len(end_date.split("-")) != 3:
83
+ raise Exception("End date should be in the format YYYY-MM-DD")
84
+ end_date_pd = pd.to_datetime(end_date)
85
+ if end_date_pd == pd.NaT:
86
+ raise Exception("End date should be in the format YYYY-MM-DD")
87
+ if end_date_pd <= pd.to_datetime("today"):
88
+ raise Exception("End date should be in the future")
89
+ except Exception as e:
90
+ return gr.Markdown.update(
91
+ value=f"""
92
+ <div style="text-align: center">
93
+ <h4>Invalid end date</h4>
94
+ <p>{e}</p>
95
+ </div>
96
+ """,
97
+ visible=True,
98
+ )
99
+
100
  # generate a random id
101
  suffix = str(uuid.uuid4())
102
  private_dataset_name = f"{who_pays}/{competition_name}{suffix}"
103
  public_dataset_name = f"{who_pays}/{competition_name}"
104
  space_name = f"competitions/{competition_name}"
105
 
106
+ sample_submission_df = pd.read_csv(sample_submission_file.name)
107
  submission_columns = ",".join(sample_submission_df.columns)
108
 
109
  conf = {
110
+ "COMPETITION_TYPE": competition_type,
111
  "SUBMISSION_LIMIT": submission_limit,
112
  "SELECTION_LIMIT": selection_limit,
113
  "END_DATE": end_date,
114
+ "EVAL_HIGHER_IS_BETTER": 1 if eval_metric != "logloss" else 0,
115
  "COMPETITION_NAME": competition_name,
116
  "SUBMISSION_ID_COLUMN": "id",
117
  "SUBMISSION_COLUMNS": submission_columns,
 
122
  api = HfApi()
123
 
124
  # create private dataset repo
125
+ try:
126
+ create_repo(
127
+ repo_id=private_dataset_name,
128
+ repo_type="dataset",
129
+ private=True,
130
+ token=user_token,
131
+ exist_ok=False,
132
+ )
133
+ except Exception as e:
134
+ return gr.Markdown.update(
135
+ value=f"""
136
+ <div style="text-align: center">
137
+ <h4>Failed to create private dataset repo</h4>
138
+ <p>{e}</p>
139
+ </div>
140
+ """,
141
+ visible=True,
142
+ )
143
  competition_desc = f"""
144
  # Welcome to {competition_name}
145
 
 
194
  token=user_token,
195
  )
196
 
197
+ with open(solution_file.name, "rb") as f:
198
+ solution_bytes_data = f.read()
199
+ # upload solution file
200
+ api.upload_file(
201
+ path_or_fileobj=solution_bytes_data,
202
+ path_in_repo="solution.csv",
203
+ repo_id=private_dataset_name,
204
+ repo_type="dataset",
205
+ token=user_token,
206
+ )
207
 
208
+ # create public dataset repo
209
+ try:
210
+ create_repo(
211
+ repo_id=public_dataset_name,
 
 
 
212
  repo_type="dataset",
213
+ private=False,
214
  token=user_token,
215
+ exist_ok=False,
216
+ )
217
+ except Exception as e:
218
+ return gr.Markdown.update(
219
+ value=f"""
220
+ <div style="text-align: center">
221
+ <h4>Failed to create public dataset repo</h4>
222
+ <p>{e}</p>
223
+ </div>
224
+ """,
225
+ visible=True,
226
  )
227
 
228
+ # upload sample submission file
229
+ with open(sample_submission_file.name, "rb") as f:
230
+ sample_submission_bytes_data = f.read()
231
+
232
+ api.upload_file(
233
+ path_or_fileobj=sample_submission_bytes_data,
234
+ path_in_repo="sample_submission.csv",
235
  repo_id=public_dataset_name,
236
  repo_type="dataset",
 
237
  token=user_token,
 
238
  )
 
 
 
 
 
 
 
 
 
 
 
 
239
 
240
  dockerfile = """
241
  FROM huggingface/competitions:latest
 
278
  """
279
  space_readme = space_readme.strip()
280
  space_readme = space_readme.replace(" ", "")
 
281
 
282
  # upload space readme
283
  space_readme_bytes = space_readme.encode("utf-8")
 
310
  token=BOT_TOKEN,
311
  )
312
 
313
+ return gr.Markdown.update(
314
+ value=f"""
315
+ <div style="text-align: center">
316
+ <h4>Competition created successfully!</h4>
317
+ <p>Private dataset: <a href="https://hf.co/datasets/{private_dataset_name}">{private_dataset_name}</a></p>
318
+ <p>Public dataset: <a href="https://hf.co/datasets/{public_dataset_name}">{public_dataset_name}</a></p>
319
+ <p>Competition space: <a href="https://hf.co/spaces/{space_name}">{space_name}</a></p>
320
+ <p>Note: Do NOT share the private dataset or link with anyone else.</p>
321
+ </div>
322
+ """,
323
+ visible=True,
324
+ )
325
+
326
 
327
  def check_if_user_can_create_competition(user_token):
328
  """
 
331
  :return: True if the user can create a competition, False otherwise
332
  """
333
  user_info = user_authentication(user_token)
 
334
  return_msg = None
335
  if "error" in user_info:
336
  return_msg = "Invalid token. You can find your HF token here: https://huggingface.co/settings/tokens"
 
338
  elif user_info["auth"]["accessToken"]["role"] != "write":
339
  return_msg = "Please provide a token with write access"
340
 
 
 
 
341
  if return_msg is not None:
342
  return [
343
  gr.Box.update(visible=False),
 
345
  gr.Dropdown.update(visible=False),
346
  ]
347
 
 
 
 
348
  orgs = user_info["orgs"]
349
  valid_orgs = [org for org in orgs if org["canPay"] is True]
350
+
351
+ if len(valid_orgs) == 0:
352
+ return_msg = """You are not a member of any organization with a valid payment method.
353
+ Please add a valid payment method for your organization in order to create competitions."""
354
+ return [
355
+ gr.Box.update(visible=False),
356
+ gr.Markdown.update(
357
+ value=return_msg,
358
+ visible=True,
359
+ ),
360
+ gr.Dropdown.update(visible=False),
361
+ ]
362
+
363
  valid_orgs = [org for org in valid_orgs if org["roleInOrg"] in ("admin", "write")]
364
 
365
+ if len(valid_orgs) == 0:
366
+ return_msg = """You dont have write access for any organization.
367
+ Please contact your organization's admin to add you as a member with write privilages."""
368
+ return [
369
+ gr.Box.update(visible=False),
370
+ gr.Markdown.update(
371
+ value=return_msg,
372
+ visible=True,
373
+ ),
374
+ gr.Dropdown.update(visible=False),
375
+ ]
376
 
377
+ valid_entities = {org["name"]: org["id"] for org in valid_orgs}
 
378
 
379
  return [
380
  gr.Box.update(visible=True),
381
  gr.Markdown.update(value="", visible=False),
382
+ gr.Dropdown.update(
383
+ choices=list(valid_entities.keys()),
384
+ visible=True,
385
+ value=list(valid_entities.keys())[0],
386
+ ),
387
  ]
388
 
389
 
 
471
  with gr.Row():
472
  create_button = gr.Button("Create Competition")
473
 
474
+ final_output = gr.Markdown(visible=True)
475
+
476
  login_button.click(
477
  check_if_user_can_create_competition, inputs=[user_token], outputs=[create_box, message_box, who_pays]
478
  )
 
489
  sample_submission_file,
490
  solution_file,
491
  ]
492
+ create_button.click(create_competition, inputs=create_inputs, outputs=[final_output])