Abhishek Thakur commited on
Commit
bcf3c90
·
1 Parent(s): 6655abf

update docker

Browse files
Files changed (5) hide show
  1. .env.example +1 -2
  2. Dockerfile +1 -24
  3. Makefile +1 -1
  4. competitions/create.py +0 -506
  5. requirements.txt +1 -3
.env.example CHANGED
@@ -1,3 +1,2 @@
1
- AUTOTRAIN_USERNAME=autoevaluator
2
- AUTOTRAIN_TOKEN=hf_XXX
3
  COMPETITION_ID=zzzz/abc
 
1
+ TOKEN=hf_XXX
 
2
  COMPETITION_ID=zzzz/abc
Dockerfile CHANGED
@@ -1,17 +1,8 @@
1
- FROM ubuntu:22.04
2
 
3
  ENV DEBIAN_FRONTEND=noninteractive \
4
  TZ=UTC
5
 
6
- RUN apt-get update && \
7
- apt-get upgrade -y && \
8
- apt-get install -y \
9
- build-essential \
10
- cmake \
11
- wget \
12
- && rm -rf /var/lib/apt/lists/* && \
13
- apt-get clean
14
-
15
  WORKDIR /app
16
  RUN mkdir -p /app/.cache
17
  ENV HF_HOME="/app/.cache"
@@ -19,20 +10,6 @@ RUN chown -R 1000:1000 /app
19
  USER 1000
20
  ENV HOME=/app
21
 
22
- ENV PYTHONPATH=$HOME/app \
23
- PYTHONUNBUFFERED=1 \
24
- SYSTEM=spaces
25
-
26
-
27
- RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
28
- && sh Miniconda3-latest-Linux-x86_64.sh -b -p /app/miniconda \
29
- && rm -f Miniconda3-latest-Linux-x86_64.sh
30
- ENV PATH /app/miniconda/bin:$PATH
31
-
32
- RUN conda create -p /app/env -y python=3.10 \
33
- && conda clean -ya
34
-
35
-
36
  SHELL ["conda", "run","--no-capture-output", "-p","/app/env", "/bin/bash", "-c"]
37
 
38
  COPY --chown=1000:1000 . /app/
 
1
+ FROM huggingface/autotrain-advanced:latest
2
 
3
  ENV DEBIAN_FRONTEND=noninteractive \
4
  TZ=UTC
5
 
 
 
 
 
 
 
 
 
 
6
  WORKDIR /app
7
  RUN mkdir -p /app/.cache
8
  ENV HF_HOME="/app/.cache"
 
10
  USER 1000
11
  ENV HOME=/app
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  SHELL ["conda", "run","--no-capture-output", "-p","/app/env", "/bin/bash", "-c"]
14
 
15
  COPY --chown=1000:1000 . /app/
Makefile CHANGED
@@ -16,7 +16,7 @@ docker:
16
  docker push huggingface/competitions:latest
17
 
18
  test:
19
- pytest -sv ./competitions/
20
 
21
  socket-kit.so: socket-kit.c
22
  gcc $(CFLAGS) -shared -fPIC $^ -o $@ -ldl
 
16
  docker push huggingface/competitions:latest
17
 
18
  test:
19
+ pytest -sv .
20
 
21
  socket-kit.so: socket-kit.c
22
  gcc $(CFLAGS) -shared -fPIC $^ -o $@ -ldl
competitions/create.py DELETED
@@ -1,506 +0,0 @@
1
- import io
2
- import json
3
- import uuid
4
-
5
- import gradio as gr
6
- import pandas as pd
7
- from huggingface_hub import HfApi, create_repo
8
-
9
- from . import BOT_TOKEN
10
- from .utils import user_authentication
11
-
12
-
13
- def verify_sample_and_solution(sample_submission, solution, eval_metric):
14
- sample_submission = pd.read_csv(sample_submission.name)
15
- solution = pd.read_csv(solution.name)
16
-
17
- # check if both contain an id column
18
- if "id" not in sample_submission.columns:
19
- raise Exception("Sample submission should contain an id column")
20
-
21
- if "id" not in solution.columns:
22
- raise Exception("Solution file should contain an id column")
23
-
24
- if eval_metric != "map-iou":
25
- # check if both files have the same ids
26
- if not (sample_submission["id"] == solution["id"]).all():
27
- raise Exception("Sample submission and solution should have the same ids")
28
-
29
- # check if both files have the same number of rows
30
- if sample_submission.shape[0] != solution.shape[0]:
31
- raise Exception("Sample submission and solution should have the same number of rows")
32
-
33
- # check if solution contains a split column
34
- if "split" not in solution.columns:
35
- raise Exception("Solution file should contain a split column")
36
-
37
- # check if split column contains only two unique values
38
- if len(solution["split"].unique()) != 2:
39
- raise Exception("Split column should contain only two unique values: public and private")
40
-
41
- # check if unique values are public and private
42
- if not set(solution["split"].unique()) == set(["public", "private"]):
43
- raise Exception("Split column should contain only two unique values: public and private")
44
-
45
- if eval_metric != "map-iou":
46
- # except the `split` column, all other columns should be the same
47
- solution_columns = solution.columns.tolist()
48
- solution_columns.remove("split")
49
- if not (sample_submission.columns == solution_columns).all():
50
- raise Exception("Sample submission and solution should have the same columns, except for the split column")
51
-
52
- return True
53
-
54
-
55
- def create_competition(
56
- user_token,
57
- who_pays,
58
- competition_type,
59
- competition_name,
60
- eval_metric,
61
- submission_limit,
62
- selection_limit,
63
- end_date,
64
- sample_submission_file,
65
- solution_file,
66
- is_public,
67
- ):
68
- # verify sample submission and solution
69
- try:
70
- verify_sample_and_solution(sample_submission_file, solution_file, eval_metric)
71
- except Exception as e:
72
- return gr.Markdown.update(
73
- value=f"""
74
- <div style="text-align: center">
75
- <h4>Invalid sample submission or solution file</h4>
76
- <p>{e}</p>
77
- </div>
78
- """,
79
- visible=True,
80
- )
81
-
82
- # check if end_date is valid format: YYYY-MM-DD and in the future
83
- try:
84
- if len(end_date.split("-")) != 3:
85
- raise Exception("End date should be in the format YYYY-MM-DD")
86
- end_date_pd = pd.to_datetime(end_date)
87
- if end_date_pd == pd.NaT:
88
- raise Exception("End date should be in the format YYYY-MM-DD")
89
- if end_date_pd <= pd.to_datetime("today"):
90
- raise Exception("End date should be in the future")
91
- except Exception as e:
92
- return gr.Markdown.update(
93
- value=f"""
94
- <div style="text-align: center">
95
- <h4>Invalid end date</h4>
96
- <p>{e}</p>
97
- </div>
98
- """,
99
- visible=True,
100
- )
101
-
102
- is_public = is_public == "Public"
103
- suffix = str(uuid.uuid4())
104
- private_dataset_name = f"{who_pays}/{competition_name}{suffix}"
105
- public_dataset_name = f"{who_pays}/{competition_name}"
106
- if is_public:
107
- space_name = f"competitions/{competition_name}"
108
- else:
109
- space_name = f"{who_pays}/{competition_name}"
110
-
111
- sample_submission_df = pd.read_csv(sample_submission_file.name)
112
- submission_columns = ",".join(sample_submission_df.columns)
113
-
114
- conf = {
115
- "COMPETITION_TYPE": competition_type,
116
- "SUBMISSION_LIMIT": submission_limit,
117
- "SELECTION_LIMIT": selection_limit,
118
- "END_DATE": end_date,
119
- "EVAL_HIGHER_IS_BETTER": 1 if eval_metric != "logloss" else 0,
120
- "COMPETITION_NAME": competition_name,
121
- "SUBMISSION_ID_COLUMN": "id",
122
- "SUBMISSION_COLUMNS": submission_columns,
123
- "SUBMISSION_ROWS": len(sample_submission_df),
124
- "EVAL_METRIC": eval_metric,
125
- }
126
- if eval_metric == "map-iou":
127
- conf["IOU_THRESHOLD"] = 0.5
128
-
129
- api = HfApi()
130
-
131
- # create private dataset repo
132
- try:
133
- create_repo(
134
- repo_id=private_dataset_name,
135
- repo_type="dataset",
136
- private=True,
137
- token=user_token,
138
- exist_ok=False,
139
- )
140
- except Exception as e:
141
- return gr.Markdown.update(
142
- value=f"""
143
- <div style="text-align: center">
144
- <h4>Failed to create private dataset repo</h4>
145
- <p>{e}</p>
146
- </div>
147
- """,
148
- visible=True,
149
- )
150
- competition_desc = f"""
151
- # Welcome to {competition_name}
152
-
153
- This is a competition description.
154
-
155
- You can use markdown to format your description.
156
- """
157
-
158
- dataset_desc = f"""
159
- # Dataset Description
160
-
161
- This is a dataset description.
162
-
163
- You can use markdown to format your description.
164
-
165
- Dataset can be downloaded from [here](https://hf.co/datasets/{public_dataset_name})
166
- """
167
-
168
- conf_json = json.dumps(conf)
169
- conf_bytes = conf_json.encode("utf-8")
170
- conf_buffer = io.BytesIO(conf_bytes)
171
-
172
- api.upload_file(
173
- path_or_fileobj=conf_buffer,
174
- path_in_repo="conf.json",
175
- repo_id=private_dataset_name,
176
- repo_type="dataset",
177
- token=user_token,
178
- )
179
-
180
- # convert competition description to bytes
181
- competition_desc_bytes = competition_desc.encode("utf-8")
182
- competition_desc_buffer = io.BytesIO(competition_desc_bytes)
183
-
184
- api.upload_file(
185
- path_or_fileobj=competition_desc_buffer,
186
- path_in_repo="COMPETITION_DESC.md",
187
- repo_id=private_dataset_name,
188
- repo_type="dataset",
189
- token=user_token,
190
- )
191
-
192
- # convert dataset description to bytes
193
- dataset_desc_bytes = dataset_desc.encode("utf-8")
194
- dataset_desc_buffer = io.BytesIO(dataset_desc_bytes)
195
-
196
- api.upload_file(
197
- path_or_fileobj=dataset_desc_buffer,
198
- path_in_repo="DATASET_DESC.md",
199
- repo_id=private_dataset_name,
200
- repo_type="dataset",
201
- token=user_token,
202
- )
203
-
204
- with open(solution_file.name, "rb") as f:
205
- solution_bytes_data = f.read()
206
- # upload solution file
207
- api.upload_file(
208
- path_or_fileobj=solution_bytes_data,
209
- path_in_repo="solution.csv",
210
- repo_id=private_dataset_name,
211
- repo_type="dataset",
212
- token=user_token,
213
- )
214
-
215
- # create public dataset repo
216
- try:
217
- create_repo(
218
- repo_id=public_dataset_name,
219
- repo_type="dataset",
220
- private=False,
221
- token=user_token,
222
- exist_ok=False,
223
- )
224
- except Exception as e:
225
- return gr.Markdown.update(
226
- value=f"""
227
- <div style="text-align: center">
228
- <h4>Failed to create public dataset repo</h4>
229
- <p>{e}</p>
230
- </div>
231
- """,
232
- visible=True,
233
- )
234
-
235
- # upload sample submission file
236
- with open(sample_submission_file.name, "rb") as f:
237
- sample_submission_bytes_data = f.read()
238
-
239
- api.upload_file(
240
- path_or_fileobj=sample_submission_bytes_data,
241
- path_in_repo="sample_submission.csv",
242
- repo_id=public_dataset_name,
243
- repo_type="dataset",
244
- token=user_token,
245
- )
246
-
247
- dockerfile = """
248
- FROM huggingface/competitions:latest
249
- CMD competitions run
250
- """
251
- dockerfile = dockerfile.strip()
252
- dockerfile = dockerfile.replace(" ", "")
253
-
254
- # create competition space
255
- create_repo(
256
- repo_id=space_name,
257
- repo_type="space",
258
- private=False,
259
- token=BOT_TOKEN if is_public else user_token,
260
- space_sdk="docker",
261
- exist_ok=False,
262
- )
263
-
264
- # upload dockerfile
265
- dockerfile_bytes = dockerfile.encode("utf-8")
266
- dockerfile_buffer = io.BytesIO(dockerfile_bytes)
267
-
268
- api.upload_file(
269
- path_or_fileobj=dockerfile_buffer,
270
- path_in_repo="Dockerfile",
271
- repo_id=space_name,
272
- repo_type="space",
273
- token=BOT_TOKEN if is_public else user_token,
274
- )
275
-
276
- space_readme = f"""
277
- ---
278
- title: {competition_name}
279
- emoji: 🏆
280
- colorFrom: blue
281
- colorTo: gray
282
- sdk: docker
283
- pinned: false
284
- ---
285
- """
286
- space_readme = space_readme.strip()
287
- space_readme = space_readme.replace(" ", "")
288
-
289
- # upload space readme
290
- space_readme_bytes = space_readme.encode("utf-8")
291
- space_readme_buffer = io.BytesIO(space_readme_bytes)
292
-
293
- api.upload_file(
294
- path_or_fileobj=space_readme_buffer,
295
- path_in_repo="README.md",
296
- repo_id=space_name,
297
- repo_type="space",
298
- token=BOT_TOKEN if is_public else user_token,
299
- )
300
-
301
- api.add_space_secret(
302
- repo_id=space_name,
303
- key="COMPETITION_ID",
304
- value=private_dataset_name,
305
- token=BOT_TOKEN if is_public else user_token,
306
- )
307
- api.add_space_secret(
308
- repo_id=space_name,
309
- key="AUTOTRAIN_USERNAME",
310
- value=who_pays,
311
- token=BOT_TOKEN if is_public else user_token,
312
- )
313
- api.add_space_secret(
314
- repo_id=space_name,
315
- key="AUTOTRAIN_TOKEN",
316
- value=user_token,
317
- token=BOT_TOKEN if is_public else user_token,
318
- )
319
-
320
- return gr.Markdown.update(
321
- value=f"""
322
- <div style="text-align: center">
323
- <h4>Competition created successfully!</h4>
324
- <p>Private dataset: <a href="https://hf.co/datasets/{private_dataset_name}">{private_dataset_name}</a></p>
325
- <p>Public dataset: <a href="https://hf.co/datasets/{public_dataset_name}">{public_dataset_name}</a></p>
326
- <p>Competition space: <a href="https://hf.co/spaces/{space_name}">{space_name}</a></p>
327
- <p>NOTE: for private competitions, please add `autoevaluator` user to your org: {who_pays}.</p>
328
- <p>NOTE: Do NOT share the private dataset or link with anyone else.</p>
329
- </div>
330
- """,
331
- visible=True,
332
- )
333
-
334
-
335
- def check_if_user_can_create_competition(user_token):
336
- """
337
- Check if the user can create a competition
338
- :param user_token: the user's token
339
- :return: True if the user can create a competition, False otherwise
340
- """
341
- user_info = user_authentication(user_token)
342
- return_msg = None
343
- if "error" in user_info:
344
- return_msg = "Invalid token. You can find your HF token here: https://huggingface.co/settings/tokens"
345
-
346
- elif user_info["auth"]["accessToken"]["role"] != "write":
347
- return_msg = "Please provide a token with write access"
348
-
349
- if return_msg is not None:
350
- return [
351
- gr.Box.update(visible=False),
352
- gr.Markdown.update(value=return_msg, visible=True),
353
- gr.Dropdown.update(visible=False),
354
- ]
355
-
356
- orgs = user_info["orgs"]
357
- valid_orgs = [org for org in orgs if org["canPay"] is True]
358
-
359
- if len(valid_orgs) == 0:
360
- return_msg = """You are not a member of any organization with a valid payment method.
361
- Please add a valid payment method for your organization in order to create competitions."""
362
- return [
363
- gr.Box.update(visible=False),
364
- gr.Markdown.update(
365
- value=return_msg,
366
- visible=True,
367
- ),
368
- gr.Dropdown.update(visible=False),
369
- ]
370
-
371
- valid_orgs = [org for org in valid_orgs if org["roleInOrg"] in ("admin", "write")]
372
-
373
- if len(valid_orgs) == 0:
374
- return_msg = """You dont have write access for any organization.
375
- Please contact your organization's admin to add you as a member with write privilages."""
376
- return [
377
- gr.Box.update(visible=False),
378
- gr.Markdown.update(
379
- value=return_msg,
380
- visible=True,
381
- ),
382
- gr.Dropdown.update(visible=False),
383
- ]
384
-
385
- valid_entities = {org["name"]: org["id"] for org in valid_orgs}
386
-
387
- return [
388
- gr.Box.update(visible=True),
389
- gr.Markdown.update(value="", visible=False),
390
- gr.Dropdown.update(
391
- choices=list(valid_entities.keys()),
392
- visible=True,
393
- value=list(valid_entities.keys())[0],
394
- ),
395
- ]
396
-
397
-
398
- with gr.Blocks() as demo:
399
- gr.Markdown(
400
- """
401
- <div style="text-align: center">
402
- <h1>Hugging Face Competition Creator</h1>
403
- """
404
- )
405
- user_token = gr.Textbox(
406
- max_lines=1,
407
- value="",
408
- label="Please enter your Hugging Face token (write access needed)",
409
- type="password",
410
- )
411
- login_button = gr.Button("Login")
412
-
413
- message_box = gr.Markdown(visible=False)
414
-
415
- with gr.Box(visible=False) as create_box:
416
- who_pays = gr.Dropdown(
417
- ["Me", "My Organization"],
418
- label="Who Pays",
419
- value="Me",
420
- visible=False,
421
- interactive=True,
422
- )
423
- competition_type = gr.Radio(
424
- ["Generic"],
425
- label="Competition Type",
426
- value="Generic",
427
- )
428
-
429
- with gr.Row():
430
- competition_name = gr.Textbox(
431
- max_lines=1,
432
- value="",
433
- label="Competition Name",
434
- placeholder="my-awesome-competition",
435
- )
436
- eval_metric = gr.Dropdown(
437
- ["accuracy", "auc", "f1", "logloss", "map-iou", "precision", "recall"],
438
- label="Evaluation Metric",
439
- value="accuracy",
440
- )
441
- with gr.Row():
442
- submission_limit = gr.Slider(
443
- minimum=1,
444
- maximum=100,
445
- value=5,
446
- step=1,
447
- label="Submission Limit Per Day",
448
- )
449
- selection_limit = gr.Slider(
450
- minimum=1,
451
- maximum=100,
452
- value=2,
453
- step=1,
454
- label="Selection Limit For Final Leaderboard",
455
- )
456
- end_date = gr.Textbox(
457
- max_lines=1,
458
- value="",
459
- label="End Date (YYYY-MM-DD), Private LB will be visible on this date",
460
- )
461
- with gr.Box():
462
- with gr.Row():
463
- with gr.Column():
464
- sample_submission_file = gr.File(
465
- label="sample_submission.csv",
466
- )
467
- with gr.Column():
468
- solution_file = gr.File(
469
- label="solution.csv",
470
- )
471
- gr.Markdown(
472
- """Please note that you will need to upload training and test
473
- data separately to the public repository that will be created.
474
- You can also change sample_submission and solution files later.
475
- """
476
- )
477
- with gr.Box():
478
- with gr.Row():
479
- is_public = gr.Dropdown(
480
- ["Public", "Private"],
481
- label="Competition Visibility. Private competitions are only visible to you and your organization members and are created inside your organization. Public competitions are available at hf.co/competitions.",
482
- value="Public",
483
- )
484
- with gr.Row():
485
- create_button = gr.Button("Create Competition")
486
-
487
- final_output = gr.Markdown(visible=True)
488
-
489
- login_button.click(
490
- check_if_user_can_create_competition, inputs=[user_token], outputs=[create_box, message_box, who_pays]
491
- )
492
-
493
- create_inputs = [
494
- user_token,
495
- who_pays,
496
- competition_type,
497
- competition_name,
498
- eval_metric,
499
- submission_limit,
500
- selection_limit,
501
- end_date,
502
- sample_submission_file,
503
- solution_file,
504
- is_public,
505
- ]
506
- create_button.click(create_competition, inputs=create_inputs, outputs=[final_output])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,4 +1,3 @@
1
- gradio==4.12.0
2
  joblib==1.3.2
3
  loguru==0.7.2
4
  pandas==2.1.4
@@ -6,5 +5,4 @@ huggingface_hub==0.20.1
6
  tabulate==0.9.0
7
  markdown==3.5.1
8
  psutil==5.9.0
9
- scikit-learn==1.3.2
10
- transformers
 
 
1
  joblib==1.3.2
2
  loguru==0.7.2
3
  pandas==2.1.4
 
5
  tabulate==0.9.0
6
  markdown==3.5.1
7
  psutil==5.9.0
8
+ scikit-learn==1.3.2