Abhishek Thakur
commited on
Commit
·
34121ca
1
Parent(s):
b65b2da
add socket kit
Browse files- Dockerfile +12 -1
- Makefile +14 -1
- competitions/evaluate.py +13 -4
- competitions/runner.py +39 -26
- competitions/submissions.py +48 -21
- requirements.txt +4 -1
- socket-kit.c +8 -0
Dockerfile
CHANGED
@@ -3,7 +3,14 @@ FROM ubuntu:22.04
|
|
3 |
ENV DEBIAN_FRONTEND=noninteractive \
|
4 |
TZ=UTC
|
5 |
|
6 |
-
RUN
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
WORKDIR /app
|
9 |
RUN mkdir -p /app/.cache
|
@@ -29,4 +36,8 @@ RUN conda create -p /app/env -y python=3.10 \
|
|
29 |
SHELL ["conda", "run","--no-capture-output", "-p","/app/env", "/bin/bash", "-c"]
|
30 |
|
31 |
COPY --chown=1000:1000 . /app/
|
|
|
|
|
|
|
|
|
32 |
RUN pip install -e .
|
|
|
3 |
ENV DEBIAN_FRONTEND=noninteractive \
|
4 |
TZ=UTC
|
5 |
|
6 |
+
RUN apt-get update && \
|
7 |
+
apt-get upgrade -y && \
|
8 |
+
apt-get install -y \
|
9 |
+
build-essential \
|
10 |
+
cmake \
|
11 |
+
wget \
|
12 |
+
&& rm -rf /var/lib/apt/lists/* && \
|
13 |
+
apt-get clean
|
14 |
|
15 |
WORKDIR /app
|
16 |
RUN mkdir -p /app/.cache
|
|
|
36 |
SHELL ["conda", "run","--no-capture-output", "-p","/app/env", "/bin/bash", "-c"]
|
37 |
|
38 |
COPY --chown=1000:1000 . /app/
|
39 |
+
RUN make socket-kit.so
|
40 |
+
|
41 |
+
ENV PATH="/app:${PATH}"
|
42 |
+
|
43 |
RUN pip install -e .
|
Makefile
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
.PHONY: quality style
|
2 |
|
3 |
quality:
|
@@ -7,4 +8,16 @@ quality:
|
|
7 |
|
8 |
style:
|
9 |
python -m black --line-length 119 --target-version py38 .
|
10 |
-
python -m isort .
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
CFLAGS += -std=c99 -Wall
|
2 |
.PHONY: quality style
|
3 |
|
4 |
quality:
|
|
|
8 |
|
9 |
style:
|
10 |
python -m black --line-length 119 --target-version py38 .
|
11 |
+
python -m isort .
|
12 |
+
|
13 |
+
docker:
|
14 |
+
docker build -t competitions:latest .
|
15 |
+
docker tag competitions:latest huggingface/competitions:latest
|
16 |
+
docker push huggingface/competitions:latest
|
17 |
+
|
18 |
+
|
19 |
+
socket-kit.so: socket-kit.c
|
20 |
+
gcc $(CFLAGS) -shared -fPIC $^ -o $@ -ldl
|
21 |
+
|
22 |
+
clean:
|
23 |
+
rm *.so
|
competitions/evaluate.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
import argparse
|
2 |
import json
|
|
|
|
|
3 |
import subprocess
|
4 |
|
5 |
from huggingface_hub import HfApi, snapshot_download
|
@@ -22,12 +24,11 @@ def upload_submission_file(params, file_path):
|
|
22 |
|
23 |
|
24 |
def generate_submission_file(params):
|
25 |
-
base_user = params.competition_id.split("/")[0]
|
26 |
logger.info("Downloading submission dataset")
|
27 |
submission_dir = snapshot_download(
|
28 |
-
repo_id=
|
29 |
local_dir=params.output_path,
|
30 |
-
token=
|
31 |
repo_type="model",
|
32 |
)
|
33 |
# submission_dir has a script.py file
|
@@ -35,7 +36,13 @@ def generate_submission_file(params):
|
|
35 |
# the script.py will generate a submission.csv file in the submission_dir
|
36 |
# push the submission.csv file to the repo using upload_submission_file
|
37 |
logger.info("Generating submission file")
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
api = HfApi(token=params.token)
|
41 |
api.upload_file(
|
@@ -48,6 +55,8 @@ def generate_submission_file(params):
|
|
48 |
|
49 |
@utils.monitor
|
50 |
def run(params):
|
|
|
|
|
51 |
if isinstance(params, dict):
|
52 |
params = EvalParams(**params)
|
53 |
|
|
|
1 |
import argparse
|
2 |
import json
|
3 |
+
import os
|
4 |
+
import shutil
|
5 |
import subprocess
|
6 |
|
7 |
from huggingface_hub import HfApi, snapshot_download
|
|
|
24 |
|
25 |
|
26 |
def generate_submission_file(params):
|
|
|
27 |
logger.info("Downloading submission dataset")
|
28 |
submission_dir = snapshot_download(
|
29 |
+
repo_id=params.submission_repo,
|
30 |
local_dir=params.output_path,
|
31 |
+
token=os.environ.get("USER_TOKEN"),
|
32 |
repo_type="model",
|
33 |
)
|
34 |
# submission_dir has a script.py file
|
|
|
36 |
# the script.py will generate a submission.csv file in the submission_dir
|
37 |
# push the submission.csv file to the repo using upload_submission_file
|
38 |
logger.info("Generating submission file")
|
39 |
+
# copy socket-kit.so to submission_dir
|
40 |
+
shutil.copyfile("socket-kit.so", f"{submission_dir}/socket-kit.so")
|
41 |
+
cmd = "python script.py"
|
42 |
+
socket_kit_path = os.path.abspath(f"{submission_dir}/socket-kit.so")
|
43 |
+
env = os.environ.copy()
|
44 |
+
env["LD_PRELOAD"] = socket_kit_path
|
45 |
+
subprocess.run(cmd, cwd=submission_dir, shell=True, check=True, env=env)
|
46 |
|
47 |
api = HfApi(token=params.token)
|
48 |
api.upload_file(
|
|
|
55 |
|
56 |
@utils.monitor
|
57 |
def run(params):
|
58 |
+
logger.info(params)
|
59 |
+
logger.info(f"User token: {os.environ.get('USER_TOKEN')}")
|
60 |
if isinstance(params, dict):
|
61 |
params = EvalParams(**params)
|
62 |
|
competitions/runner.py
CHANGED
@@ -2,13 +2,11 @@ import glob
|
|
2 |
import io
|
3 |
import json
|
4 |
import os
|
5 |
-
import random
|
6 |
-
import string
|
7 |
import time
|
8 |
from dataclasses import dataclass
|
9 |
|
10 |
import pandas as pd
|
11 |
-
from huggingface_hub import HfApi, snapshot_download
|
12 |
from loguru import logger
|
13 |
|
14 |
from competitions.info import CompetitionInfo
|
@@ -18,7 +16,7 @@ from competitions.utils import run_evaluation
|
|
18 |
_DOCKERFILE = """
|
19 |
FROM huggingface/competitions:latest
|
20 |
|
21 |
-
CMD uvicorn competitions.
|
22 |
"""
|
23 |
|
24 |
# format _DOCKERFILE
|
@@ -59,6 +57,8 @@ class JobRunner:
|
|
59 |
"submission_id": sub["submission_id"],
|
60 |
"datetime": sub["datetime"],
|
61 |
"submission_repo": sub["submission_repo"],
|
|
|
|
|
62 |
}
|
63 |
)
|
64 |
if len(pending_submissions) == 0:
|
@@ -105,23 +105,8 @@ class JobRunner:
|
|
105 |
_readme = io.BytesIO(_readme.encode())
|
106 |
return _readme
|
107 |
|
108 |
-
def create_space(self, team_id, submission_id, submission_repo):
|
109 |
-
project_name = "".join(
|
110 |
-
random.choices(
|
111 |
-
string.ascii_lowercase + string.digits,
|
112 |
-
k=10,
|
113 |
-
)
|
114 |
-
)
|
115 |
api = HfApi(token=self.token)
|
116 |
-
username = self.competition_id.split("/")[0]
|
117 |
-
repo_id = f"{username}/competitions-{project_name}"
|
118 |
-
api.create_repo(
|
119 |
-
repo_id=repo_id,
|
120 |
-
repo_type="space",
|
121 |
-
space_sdk="docker",
|
122 |
-
space_hardware="cpu-basic",
|
123 |
-
private=True,
|
124 |
-
)
|
125 |
params = {
|
126 |
"competition_id": self.competition_id,
|
127 |
"competition_type": self.competition_type,
|
@@ -136,13 +121,13 @@ class JobRunner:
|
|
136 |
"submission_repo": submission_repo,
|
137 |
}
|
138 |
|
139 |
-
api.add_space_secret(repo_id=
|
140 |
|
141 |
-
readme = self._create_readme(
|
142 |
api.upload_file(
|
143 |
path_or_fileobj=readme,
|
144 |
path_in_repo="README.md",
|
145 |
-
repo_id=
|
146 |
repo_type="space",
|
147 |
)
|
148 |
|
@@ -150,10 +135,35 @@ class JobRunner:
|
|
150 |
api.upload_file(
|
151 |
path_or_fileobj=_dockerfile,
|
152 |
path_in_repo="Dockerfile",
|
153 |
-
repo_id=
|
154 |
repo_type="space",
|
155 |
)
|
156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
|
158 |
def run(self):
|
159 |
while True:
|
@@ -168,5 +178,8 @@ class JobRunner:
|
|
168 |
team_id = row["team_id"]
|
169 |
submission_id = row["submission_id"]
|
170 |
submission_repo = row["submission_repo"]
|
171 |
-
|
|
|
|
|
|
|
172 |
time.sleep(5)
|
|
|
2 |
import io
|
3 |
import json
|
4 |
import os
|
|
|
|
|
5 |
import time
|
6 |
from dataclasses import dataclass
|
7 |
|
8 |
import pandas as pd
|
9 |
+
from huggingface_hub import HfApi, hf_hub_download, snapshot_download
|
10 |
from loguru import logger
|
11 |
|
12 |
from competitions.info import CompetitionInfo
|
|
|
16 |
_DOCKERFILE = """
|
17 |
FROM huggingface/competitions:latest
|
18 |
|
19 |
+
CMD uvicorn competitions.api:api --port 7860 --host 0.0.0.0
|
20 |
"""
|
21 |
|
22 |
# format _DOCKERFILE
|
|
|
57 |
"submission_id": sub["submission_id"],
|
58 |
"datetime": sub["datetime"],
|
59 |
"submission_repo": sub["submission_repo"],
|
60 |
+
"space_id": sub["space_id"],
|
61 |
+
"space_status": sub["space_status"],
|
62 |
}
|
63 |
)
|
64 |
if len(pending_submissions) == 0:
|
|
|
105 |
_readme = io.BytesIO(_readme.encode())
|
106 |
return _readme
|
107 |
|
108 |
+
def create_space(self, team_id, submission_id, submission_repo, space_id):
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
api = HfApi(token=self.token)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
params = {
|
111 |
"competition_id": self.competition_id,
|
112 |
"competition_type": self.competition_type,
|
|
|
121 |
"submission_repo": submission_repo,
|
122 |
}
|
123 |
|
124 |
+
api.add_space_secret(repo_id=space_id, key="PARAMS", value=json.dumps(params))
|
125 |
|
126 |
+
readme = self._create_readme(space_id.split("/")[-1])
|
127 |
api.upload_file(
|
128 |
path_or_fileobj=readme,
|
129 |
path_in_repo="README.md",
|
130 |
+
repo_id=space_id,
|
131 |
repo_type="space",
|
132 |
)
|
133 |
|
|
|
135 |
api.upload_file(
|
136 |
path_or_fileobj=_dockerfile,
|
137 |
path_in_repo="Dockerfile",
|
138 |
+
repo_id=space_id,
|
139 |
repo_type="space",
|
140 |
)
|
141 |
+
|
142 |
+
# update space_status in submission_info
|
143 |
+
team_fname = hf_hub_download(
|
144 |
+
repo_id=self.competition_id,
|
145 |
+
filename=f"submission_info/{team_id}.json",
|
146 |
+
token=self.token,
|
147 |
+
repo_type="dataset",
|
148 |
+
)
|
149 |
+
with open(team_fname, "r", encoding="utf-8") as f:
|
150 |
+
team_submission_info = json.load(f)
|
151 |
+
|
152 |
+
for submission in team_submission_info["submissions"]:
|
153 |
+
if submission["submission_id"] == submission_id:
|
154 |
+
submission["space_status"] = 1
|
155 |
+
break
|
156 |
+
|
157 |
+
team_submission_info_json = json.dumps(team_submission_info, indent=4)
|
158 |
+
team_submission_info_json_bytes = team_submission_info_json.encode("utf-8")
|
159 |
+
team_submission_info_json_buffer = io.BytesIO(team_submission_info_json_bytes)
|
160 |
+
api = HfApi(token=self.token)
|
161 |
+
api.upload_file(
|
162 |
+
path_or_fileobj=team_submission_info_json_buffer,
|
163 |
+
path_in_repo=f"submission_info/{team_id}.json",
|
164 |
+
repo_id=self.competition_id,
|
165 |
+
repo_type="dataset",
|
166 |
+
)
|
167 |
|
168 |
def run(self):
|
169 |
while True:
|
|
|
178 |
team_id = row["team_id"]
|
179 |
submission_id = row["submission_id"]
|
180 |
submission_repo = row["submission_repo"]
|
181 |
+
space_id = row["space_id"]
|
182 |
+
space_status = row["space_status"]
|
183 |
+
if space_status == 0:
|
184 |
+
self.create_space(team_id, submission_id, submission_repo, space_id)
|
185 |
time.sleep(5)
|
competitions/submissions.py
CHANGED
@@ -5,7 +5,7 @@ from dataclasses import dataclass
|
|
5 |
from datetime import datetime
|
6 |
|
7 |
import pandas as pd
|
8 |
-
from huggingface_hub import HfApi, hf_hub_download
|
9 |
from huggingface_hub.utils._errors import EntryNotFoundError
|
10 |
from loguru import logger
|
11 |
|
@@ -133,9 +133,20 @@ class Submissions:
|
|
133 |
todays_submissions += 1
|
134 |
return todays_submissions
|
135 |
|
136 |
-
def _increment_submissions(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
if submission_repo is None:
|
138 |
submission_repo = ""
|
|
|
|
|
139 |
team_fname = hf_hub_download(
|
140 |
repo_id=self.competition_id,
|
141 |
filename=f"submission_info/{team_id}.json",
|
@@ -153,11 +164,13 @@ class Submissions:
|
|
153 |
"submission_id": submission_id,
|
154 |
"submission_comment": submission_comment,
|
155 |
"submission_repo": submission_repo,
|
|
|
156 |
"submitted_by": user_id,
|
157 |
"status": "pending",
|
158 |
"selected": False,
|
159 |
"public_score": -1,
|
160 |
"private_score": -1,
|
|
|
161 |
}
|
162 |
)
|
163 |
# count the number of times user has submitted today
|
@@ -227,9 +240,7 @@ class Submissions:
|
|
227 |
repo_type="dataset",
|
228 |
)
|
229 |
|
230 |
-
def _get_team_subs(self,
|
231 |
-
user_id = user_info["id"]
|
232 |
-
team_id = self._get_team_id(user_id)
|
233 |
try:
|
234 |
team_submissions = self._download_team_subs(team_id)
|
235 |
except EntryNotFoundError:
|
@@ -319,7 +330,8 @@ class Submissions:
|
|
319 |
private = False
|
320 |
if current_date_time >= self.end_date:
|
321 |
private = True
|
322 |
-
|
|
|
323 |
return success_subs, failed_subs
|
324 |
|
325 |
def _get_team_id(self, user_id):
|
@@ -381,33 +393,48 @@ class Submissions:
|
|
381 |
user_id=user_id,
|
382 |
submission_id=submission_id,
|
383 |
submission_comment=submission_comment,
|
384 |
-
submission_repo="",
|
385 |
)
|
386 |
else:
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
393 |
api = HfApi(token=self.token)
|
394 |
-
competition_user = self.competition_id.split("/")[0]
|
395 |
api.create_repo(
|
396 |
-
repo_id=
|
397 |
-
repo_type="
|
|
|
|
|
398 |
private=True,
|
399 |
)
|
400 |
-
|
401 |
-
|
402 |
-
repo_id=f"{competition_user}/{submission_id}",
|
403 |
-
repo_type="model",
|
404 |
-
)
|
405 |
submissions_made = self._increment_submissions(
|
406 |
team_id=team_id,
|
407 |
user_id=user_id,
|
408 |
submission_id=submission_id,
|
409 |
submission_comment=submission_comment,
|
410 |
submission_repo=uploaded_file,
|
|
|
|
|
411 |
)
|
412 |
remaining_submissions = self.submission_limit - submissions_made
|
413 |
return remaining_submissions
|
|
|
5 |
from datetime import datetime
|
6 |
|
7 |
import pandas as pd
|
8 |
+
from huggingface_hub import HfApi, hf_hub_download
|
9 |
from huggingface_hub.utils._errors import EntryNotFoundError
|
10 |
from loguru import logger
|
11 |
|
|
|
133 |
todays_submissions += 1
|
134 |
return todays_submissions
|
135 |
|
136 |
+
def _increment_submissions(
|
137 |
+
self,
|
138 |
+
team_id,
|
139 |
+
user_id,
|
140 |
+
submission_id,
|
141 |
+
submission_comment,
|
142 |
+
submission_repo=None,
|
143 |
+
space_id=None,
|
144 |
+
space_status=0,
|
145 |
+
):
|
146 |
if submission_repo is None:
|
147 |
submission_repo = ""
|
148 |
+
if space_id is None:
|
149 |
+
space_id = ""
|
150 |
team_fname = hf_hub_download(
|
151 |
repo_id=self.competition_id,
|
152 |
filename=f"submission_info/{team_id}.json",
|
|
|
164 |
"submission_id": submission_id,
|
165 |
"submission_comment": submission_comment,
|
166 |
"submission_repo": submission_repo,
|
167 |
+
"space_id": space_id,
|
168 |
"submitted_by": user_id,
|
169 |
"status": "pending",
|
170 |
"selected": False,
|
171 |
"public_score": -1,
|
172 |
"private_score": -1,
|
173 |
+
"space_status": space_status,
|
174 |
}
|
175 |
)
|
176 |
# count the number of times user has submitted today
|
|
|
240 |
repo_type="dataset",
|
241 |
)
|
242 |
|
243 |
+
def _get_team_subs(self, team_id, private=False):
|
|
|
|
|
244 |
try:
|
245 |
team_submissions = self._download_team_subs(team_id)
|
246 |
except EntryNotFoundError:
|
|
|
330 |
private = False
|
331 |
if current_date_time >= self.end_date:
|
332 |
private = True
|
333 |
+
team_id = self._get_team_id(user_info["id"])
|
334 |
+
success_subs, failed_subs = self._get_team_subs(team_id, private=private)
|
335 |
return success_subs, failed_subs
|
336 |
|
337 |
def _get_team_id(self, user_id):
|
|
|
393 |
user_id=user_id,
|
394 |
submission_id=submission_id,
|
395 |
submission_comment=submission_comment,
|
|
|
396 |
)
|
397 |
else:
|
398 |
+
# Download the submission repo and upload it to the competition repo
|
399 |
+
# submission_repo = snapshot_download(
|
400 |
+
# repo_id=uploaded_file,
|
401 |
+
# local_dir=submission_id,
|
402 |
+
# token=user_token,
|
403 |
+
# repo_type="model",
|
404 |
+
# )
|
405 |
+
# api = HfApi(token=self.token)
|
406 |
+
# competition_user = self.competition_id.split("/")[0]
|
407 |
+
# api.create_repo(
|
408 |
+
# repo_id=f"{competition_user}/{submission_id}",
|
409 |
+
# repo_type="model",
|
410 |
+
# private=True,
|
411 |
+
# )
|
412 |
+
# api.upload_folder(
|
413 |
+
# folder_path=submission_repo,
|
414 |
+
# repo_id=f"{competition_user}/{submission_id}",
|
415 |
+
# repo_type="model",
|
416 |
+
# )
|
417 |
+
# create barebones submission runner space
|
418 |
+
competition_organizer = self.competition_id.split("/")[0]
|
419 |
+
space_id = f"{competition_organizer}/comp-{submission_id}"
|
420 |
api = HfApi(token=self.token)
|
|
|
421 |
api.create_repo(
|
422 |
+
repo_id=space_id,
|
423 |
+
repo_type="space",
|
424 |
+
space_sdk="docker",
|
425 |
+
space_hardware="cpu-basic",
|
426 |
private=True,
|
427 |
)
|
428 |
+
|
429 |
+
api.add_space_secret(repo_id=space_id, key="USER_TOKEN", value=user_token)
|
|
|
|
|
|
|
430 |
submissions_made = self._increment_submissions(
|
431 |
team_id=team_id,
|
432 |
user_id=user_id,
|
433 |
submission_id=submission_id,
|
434 |
submission_comment=submission_comment,
|
435 |
submission_repo=uploaded_file,
|
436 |
+
space_id=space_id,
|
437 |
+
space_status=0,
|
438 |
)
|
439 |
remaining_submissions = self.submission_limit - submissions_made
|
440 |
return remaining_submissions
|
requirements.txt
CHANGED
@@ -4,4 +4,7 @@ loguru==0.7.2
|
|
4 |
pandas==2.1.4
|
5 |
huggingface_hub==0.20.1
|
6 |
tabulate==0.9.0
|
7 |
-
markdown==3.5.1
|
|
|
|
|
|
|
|
4 |
pandas==2.1.4
|
5 |
huggingface_hub==0.20.1
|
6 |
tabulate==0.9.0
|
7 |
+
markdown==3.5.1
|
8 |
+
psutil==5.9.0
|
9 |
+
scikit-learn==1.3.2
|
10 |
+
transformers
|
socket-kit.c
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include <errno.h>
|
2 |
+
#include <sys/socket.h>
|
3 |
+
|
4 |
+
int connect(int fd, const struct sockaddr *addr, socklen_t len)
|
5 |
+
{
|
6 |
+
errno = ENETDOWN;
|
7 |
+
return -1;
|
8 |
+
}
|