Abhishek Thakur commited on
Commit
a2fa160
·
1 Parent(s): 1094cbb

migrate to teams

Browse files
.dockerignore CHANGED
@@ -4,6 +4,7 @@
4
  .vim/
5
  flagged/
6
  *.csv
 
7
 
8
  # Byte-compiled / optimized / DLL files
9
  __pycache__/
 
4
  .vim/
5
  flagged/
6
  *.csv
7
+ *.db
8
 
9
  # Byte-compiled / optimized / DLL files
10
  __pycache__/
.gitignore CHANGED
@@ -4,6 +4,7 @@
4
  .vim/
5
  flagged/
6
  *.csv
 
7
 
8
  # Byte-compiled / optimized / DLL files
9
  __pycache__/
 
4
  .vim/
5
  flagged/
6
  *.csv
7
+ *.db
8
 
9
  # Byte-compiled / optimized / DLL files
10
  __pycache__/
16337e22-7815-4ebd-a6c4-7a58dc46e214/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
16337e22-7815-4ebd-a6c4-7a58dc46e214/script.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+
4
+ sub = []
5
+ for i in range(10000):
6
+ sub.append((i, 0.5))
7
+
8
+ sub = pd.DataFrame(sub, columns=["id", "pred"])
9
+ sub.to_csv("submission.csv", index=False)
competitions/api.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import os
3
+ import signal
4
+ import sqlite3
5
+ from contextlib import asynccontextmanager
6
+
7
+ import psutil
8
+ from fastapi import FastAPI
9
+ from loguru import logger
10
+
11
+ from competitions.utils import run_evaluation
12
+
13
+
14
+ def get_process_status(pid):
15
+ try:
16
+ process = psutil.Process(pid)
17
+ proc_status = process.status()
18
+ return proc_status
19
+ except psutil.NoSuchProcess:
20
+ logger.info(f"No process found with PID: {pid}")
21
+ return "Completed"
22
+
23
+
24
+ def kill_process_by_pid(pid):
25
+ """Kill process by PID."""
26
+ os.kill(pid, signal.SIGTERM)
27
+
28
+
29
+ class JobDB:
30
+ def __init__(self, db_path):
31
+ self.db_path = db_path
32
+ self.conn = sqlite3.connect(db_path)
33
+ self.c = self.conn.cursor()
34
+ self.create_jobs_table()
35
+
36
+ def create_jobs_table(self):
37
+ self.c.execute(
38
+ """CREATE TABLE IF NOT EXISTS jobs
39
+ (id INTEGER PRIMARY KEY, pid INTEGER)"""
40
+ )
41
+ self.conn.commit()
42
+
43
+ def add_job(self, pid):
44
+ sql = f"INSERT INTO jobs (pid) VALUES ({pid})"
45
+ self.c.execute(sql)
46
+ self.conn.commit()
47
+
48
+ def get_running_jobs(self):
49
+ self.c.execute("""SELECT pid FROM jobs""")
50
+ running_pids = self.c.fetchall()
51
+ running_pids = [pid[0] for pid in running_pids]
52
+ return running_pids
53
+
54
+ def delete_job(self, pid):
55
+ sql = f"DELETE FROM jobs WHERE pid={pid}"
56
+ self.c.execute(sql)
57
+ self.conn.commit()
58
+
59
+
60
+ PARAMS = os.environ.get("PARAMS")
61
+ DB = JobDB("job.db")
62
+
63
+
64
+ class BackgroundRunner:
65
+ async def run_main(self):
66
+ while True:
67
+ running_jobs = DB.get_running_jobs()
68
+ if running_jobs:
69
+ for _pid in running_jobs:
70
+ proc_status = get_process_status(_pid)
71
+ proc_status = proc_status.strip().lower()
72
+ if proc_status in ("completed", "error", "zombie"):
73
+ logger.info(f"Process {_pid} is already completed. Skipping...")
74
+ try:
75
+ kill_process_by_pid(_pid)
76
+ except Exception as e:
77
+ logger.info(f"Error while killing process: {e}")
78
+ DB.delete_job(_pid)
79
+
80
+ running_jobs = DB.get_running_jobs()
81
+ if not running_jobs:
82
+ logger.info("No running jobs found. Shutting down the server.")
83
+ os.kill(os.getpid(), signal.SIGINT)
84
+ await asyncio.sleep(30)
85
+
86
+
87
+ runner = BackgroundRunner()
88
+
89
+
90
+ @asynccontextmanager
91
+ async def lifespan(app: FastAPI):
92
+ process_pid = run_evaluation(params=PARAMS)
93
+ logger.info(f"Started training with PID {process_pid}")
94
+ DB.add_job(process_pid)
95
+ asyncio.create_task(runner.run_main())
96
+ yield
97
+
98
+
99
+ api = FastAPI(lifespan=lifespan)
100
+
101
+
102
+ @api.get("/")
103
+ async def root():
104
+ return "Your model is being evaluated..."
105
+
106
+
107
+ @api.get("/health")
108
+ async def health():
109
+ return "OK"
competitions/app.py CHANGED
@@ -5,6 +5,7 @@ from fastapi import FastAPI, File, Form, Request, UploadFile
5
  from fastapi.responses import HTMLResponse, JSONResponse
6
  from fastapi.staticfiles import StaticFiles
7
  from fastapi.templating import Jinja2Templates
 
8
  from pydantic import BaseModel
9
 
10
  from competitions.info import CompetitionInfo
@@ -83,6 +84,7 @@ async def get_leaderboard(request: Request, lb: str):
83
  autotrain_token=HF_TOKEN,
84
  )
85
  df = leaderboard.fetch(private=lb == "private")
 
86
  resp = {"response": df.to_markdown(index=False)}
87
  return resp
88
 
@@ -94,6 +96,7 @@ async def my_submissions(request: Request, user: User):
94
  submission_limit=COMP_INFO.submission_limit,
95
  competition_id=COMPETITION_ID,
96
  token=HF_TOKEN,
 
97
  )
98
  success_subs, failed_subs = sub.my_submissions(user.user_token)
99
  success_subs = success_subs.to_markdown(index=False)
@@ -107,7 +110,7 @@ async def my_submissions(request: Request, user: User):
107
 
108
  @app.post("/new_submission", response_class=JSONResponse)
109
  async def new_submission(
110
- submission_file: UploadFile = File(...),
111
  hub_model: str = Form(...),
112
  token: str = Form(...),
113
  submission_comment: str = Form(...),
@@ -117,6 +120,7 @@ async def new_submission(
117
  submission_limit=COMP_INFO.submission_limit,
118
  competition_id=COMPETITION_ID,
119
  token=HF_TOKEN,
 
120
  )
121
  if COMP_INFO.competition_type == "generic":
122
  resp = sub.new_submission(token, submission_file, submission_comment)
 
5
  from fastapi.responses import HTMLResponse, JSONResponse
6
  from fastapi.staticfiles import StaticFiles
7
  from fastapi.templating import Jinja2Templates
8
+ from loguru import logger
9
  from pydantic import BaseModel
10
 
11
  from competitions.info import CompetitionInfo
 
84
  autotrain_token=HF_TOKEN,
85
  )
86
  df = leaderboard.fetch(private=lb == "private")
87
+ logger.info(df)
88
  resp = {"response": df.to_markdown(index=False)}
89
  return resp
90
 
 
96
  submission_limit=COMP_INFO.submission_limit,
97
  competition_id=COMPETITION_ID,
98
  token=HF_TOKEN,
99
+ competition_type=COMP_INFO.competition_type,
100
  )
101
  success_subs, failed_subs = sub.my_submissions(user.user_token)
102
  success_subs = success_subs.to_markdown(index=False)
 
110
 
111
  @app.post("/new_submission", response_class=JSONResponse)
112
  async def new_submission(
113
+ submission_file: UploadFile = File(None),
114
  hub_model: str = Form(...),
115
  token: str = Form(...),
116
  submission_comment: str = Form(...),
 
120
  submission_limit=COMP_INFO.submission_limit,
121
  competition_id=COMPETITION_ID,
122
  token=HF_TOKEN,
123
+ competition_type=COMP_INFO.competition_type,
124
  )
125
  if COMP_INFO.competition_type == "generic":
126
  resp = sub.new_submission(token, submission_file, submission_comment)
competitions/compute_metrics.py CHANGED
@@ -15,7 +15,7 @@ def compute_metrics(params):
15
 
16
  solution_df = pd.read_csv(solution_file)
17
 
18
- submission_filename = f"submissions/{params.user_id}-{params.submission_id}.csv"
19
  submission_file = hf_hub_download(
20
  repo_id=params.competition_id,
21
  filename=submission_filename,
@@ -47,7 +47,7 @@ def compute_metrics(params):
47
  else:
48
  _metric = getattr(metrics, params.metric)
49
  target_cols = [col for col in solution_df.columns if col not in [params.submission_id_col, "split"]]
50
- public_score = _metric(private_solution_df[target_cols], public_submission_df[target_cols])
51
  private_score = _metric(private_solution_df[target_cols], private_submission_df[target_cols])
52
 
53
  # scores can also be dictionaries for multiple metrics
 
15
 
16
  solution_df = pd.read_csv(solution_file)
17
 
18
+ submission_filename = f"submissions/{params.team_id}-{params.submission_id}.csv"
19
  submission_file = hf_hub_download(
20
  repo_id=params.competition_id,
21
  filename=submission_filename,
 
47
  else:
48
  _metric = getattr(metrics, params.metric)
49
  target_cols = [col for col in solution_df.columns if col not in [params.submission_id_col, "split"]]
50
+ public_score = _metric(public_solution_df[target_cols], public_submission_df[target_cols])
51
  private_score = _metric(private_solution_df[target_cols], private_submission_df[target_cols])
52
 
53
  # scores can also be dictionaries for multiple metrics
competitions/evaluate.py CHANGED
@@ -1,7 +1,8 @@
1
  import argparse
2
  import json
 
3
 
4
- from huggingface_hub import snapshot_download
5
  from loguru import logger
6
 
7
  from competitions import utils
@@ -15,12 +16,32 @@ def parse_args():
15
  return parser.parse_args()
16
 
17
 
 
 
 
 
 
18
  def generate_submission_file(params):
 
19
  logger.info("Downloading submission dataset")
20
- snapshot_download(
21
- repo_id=params.data_path,
22
  local_dir=params.output_path,
23
  token=params.token,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  repo_type="dataset",
25
  )
26
 
@@ -35,9 +56,9 @@ def run(params):
35
  if params.competition_type == "code":
36
  generate_submission_file(params)
37
 
38
- public_score, private_score = compute_metrics(params)
39
 
40
- utils.update_submission_score(params, public_score, private_score)
41
  utils.update_submission_status(params, "success")
42
  utils.pause_space(params)
43
 
 
1
  import argparse
2
  import json
3
+ import subprocess
4
 
5
+ from huggingface_hub import HfApi, snapshot_download
6
  from loguru import logger
7
 
8
  from competitions import utils
 
16
  return parser.parse_args()
17
 
18
 
19
+ def upload_submission_file(params, file_path):
20
+ logger.info("Uploading submission file")
21
+ pass
22
+
23
+
24
  def generate_submission_file(params):
25
+ base_user = params.competition_id.split("/")[0]
26
  logger.info("Downloading submission dataset")
27
+ submission_dir = snapshot_download(
28
+ repo_id=f"{base_user}/{params.submission_id}",
29
  local_dir=params.output_path,
30
  token=params.token,
31
+ repo_type="model",
32
+ )
33
+ # submission_dir has a script.py file
34
+ # start a subprocess to run the script.py
35
+ # the script.py will generate a submission.csv file in the submission_dir
36
+ # push the submission.csv file to the repo using upload_submission_file
37
+ logger.info("Generating submission file")
38
+ subprocess.run(["python", "script.py"], cwd=submission_dir)
39
+
40
+ api = HfApi(token=params.token)
41
+ api.upload_file(
42
+ path_or_fileobj=f"{submission_dir}/submission.csv",
43
+ path_in_repo=f"submissions/{params.team_id}-{params.submission_id}.csv",
44
+ repo_id=params.competition_id,
45
  repo_type="dataset",
46
  )
47
 
 
56
  if params.competition_type == "code":
57
  generate_submission_file(params)
58
 
59
+ evaluation = compute_metrics(params)
60
 
61
+ utils.update_submission_score(params, evaluation["public_score"], evaluation["private_score"])
62
  utils.update_submission_status(params, "success")
63
  utils.pause_space(params)
64
 
competitions/info.py CHANGED
@@ -84,10 +84,6 @@ class CompetitionInfo:
84
  def competition_description(self):
85
  return self.competition_desc
86
 
87
- @property
88
- def competition_name(self):
89
- return self.config["COMPETITION_NAME"]
90
-
91
  @property
92
  def submission_columns(self):
93
  return self.config["SUBMISSION_COLUMNS"].split(",")
 
84
  def competition_description(self):
85
  return self.competition_desc
86
 
 
 
 
 
87
  @property
88
  def submission_columns(self):
89
  return self.config["SUBMISSION_COLUMNS"].split(",")
competitions/leaderboard.py CHANGED
@@ -25,13 +25,13 @@ class Leaderboard:
25
  def _refresh_columns(self):
26
  self.private_columns = [
27
  "rank",
28
- "name",
29
  "private_score",
30
  "submission_datetime",
31
  ]
32
  self.public_columns = [
33
  "rank",
34
- "name",
35
  "public_score",
36
  "submission_datetime",
37
  ]
@@ -49,14 +49,16 @@ class Leaderboard:
49
  start_time = time.time()
50
  submissions = []
51
  for submission in glob.glob(os.path.join(submissions_folder, "submission_info", "*.json")):
52
- with open(submission, "r") as f:
53
  submission_info = json.load(f)
54
  # only select submissions that are done
55
- submission_info["submissions"] = [sub for sub in submission_info["submissions"] if sub["status"] == "done"]
 
 
56
  submission_info["submissions"] = [
57
  sub
58
  for sub in submission_info["submissions"]
59
- if datetime.strptime(sub["date"], "%Y-%m-%d") < self.end_date
60
  ]
61
  if len(submission_info["submissions"]) == 0:
62
  continue
@@ -83,15 +85,13 @@ class Leaderboard:
83
  submission_info["submissions"] = submission_info["submissions"][0]
84
  temp_info = {
85
  "id": submission_info["id"],
86
- "name": submission_info["name"],
87
  "submission_id": submission_info["submissions"]["submission_id"],
88
  "submission_comment": submission_info["submissions"]["submission_comment"],
89
  "status": submission_info["submissions"]["status"],
90
  "selected": submission_info["submissions"]["selected"],
91
  "public_score": submission_info["submissions"]["public_score"],
92
  # "private_score": submission_info["submissions"]["private_score"],
93
- "submission_date": submission_info["submissions"]["date"],
94
- "submission_time": submission_info["submissions"]["time"],
95
  }
96
  for score in other_scores:
97
  temp_info[score] = submission_info["submissions"][score]
@@ -112,10 +112,10 @@ class Leaderboard:
112
  start_time = time.time()
113
  submissions = []
114
  for submission in glob.glob(os.path.join(submissions_folder, "submission_info", "*.json")):
115
- with open(submission, "r") as f:
116
  submission_info = json.load(f)
117
  submission_info["submissions"] = [
118
- sub for sub in submission_info["submissions"] if sub["status"] == "done"
119
  ]
120
  if len(submission_info["submissions"]) == 0:
121
  continue
@@ -146,8 +146,7 @@ class Leaderboard:
146
  if selected_submissions == 0:
147
  # select submissions with best public score
148
  submission_info["submissions"].sort(
149
- key=lambda x: x["public_score"],
150
- reverse=True if self.eval_higher_is_better else False,
151
  )
152
  # select only the best submission
153
  submission_info["submissions"] = submission_info["submissions"][0]
@@ -156,8 +155,7 @@ class Leaderboard:
156
  submission_info["submissions"] = [sub for sub in submission_info["submissions"] if sub["selected"]]
157
  # sort by private score
158
  submission_info["submissions"].sort(
159
- key=lambda x: x["private_score"],
160
- reverse=True if self.eval_higher_is_better else False,
161
  )
162
  # select only the best submission
163
  submission_info["submissions"] = submission_info["submissions"][0]
@@ -167,28 +165,24 @@ class Leaderboard:
167
  sub for sub in submission_info["submissions"] if not sub["selected"]
168
  ]
169
  temp_best_public_submissions.sort(
170
- key=lambda x: x["public_score"],
171
- reverse=True if self.eval_higher_is_better else False,
172
  )
173
  missing_candidates = self.max_selected_submissions - len(temp_selected_submissions)
174
  temp_best_public_submissions = temp_best_public_submissions[:missing_candidates]
175
  submission_info["submissions"] = temp_selected_submissions + temp_best_public_submissions
176
  submission_info["submissions"].sort(
177
- key=lambda x: x["private_score"],
178
- reverse=True if self.eval_higher_is_better else False,
179
  )
180
  submission_info["submissions"] = submission_info["submissions"][0]
181
 
182
  temp_info = {
183
  "id": submission_info["id"],
184
- "name": submission_info["name"],
185
  "submission_id": submission_info["submissions"]["submission_id"],
186
  "submission_comment": submission_info["submissions"]["submission_comment"],
187
  "status": submission_info["submissions"]["status"],
188
  "selected": submission_info["submissions"]["selected"],
189
  "private_score": submission_info["submissions"]["private_score"],
190
- "submission_date": submission_info["submissions"]["date"],
191
- "submission_time": submission_info["submissions"]["time"],
192
  }
193
  for score in other_scores:
194
  temp_info[score] = submission_info["submissions"][score]
@@ -206,10 +200,10 @@ class Leaderboard:
206
  return pd.DataFrame()
207
 
208
  df = pd.DataFrame(submissions)
209
- # convert submission date and time to datetime
210
- df["submission_datetime"] = pd.to_datetime(
211
- df["submission_date"] + " " + df["submission_time"], format="%Y-%m-%d %H:%M:%S"
212
- )
213
  # only keep submissions before the end date
214
  df = df[df["submission_datetime"] < self.end_date].reset_index(drop=True)
215
 
@@ -254,7 +248,6 @@ class Leaderboard:
254
  columns = self.public_columns if not private else self.private_columns
255
  logger.info(columns)
256
  # remove duplicate columns
257
- # ['rank', 'name', 'public_score', 'submission_datetime', 'public_score_track1', 'public_score_track1', 'public_score_track1', 'public_score_track1']
258
  columns = list(dict.fromkeys(columns))
259
 
260
  # send submission_datetime to the end
 
25
  def _refresh_columns(self):
26
  self.private_columns = [
27
  "rank",
28
+ "id",
29
  "private_score",
30
  "submission_datetime",
31
  ]
32
  self.public_columns = [
33
  "rank",
34
+ "id",
35
  "public_score",
36
  "submission_datetime",
37
  ]
 
49
  start_time = time.time()
50
  submissions = []
51
  for submission in glob.glob(os.path.join(submissions_folder, "submission_info", "*.json")):
52
+ with open(submission, "r", encoding="utf-8") as f:
53
  submission_info = json.load(f)
54
  # only select submissions that are done
55
+ submission_info["submissions"] = [
56
+ sub for sub in submission_info["submissions"] if sub["status"] == "success"
57
+ ]
58
  submission_info["submissions"] = [
59
  sub
60
  for sub in submission_info["submissions"]
61
+ if datetime.strptime(sub["datetime"], "%Y-%m-%d %H:%M:%S") < self.end_date
62
  ]
63
  if len(submission_info["submissions"]) == 0:
64
  continue
 
85
  submission_info["submissions"] = submission_info["submissions"][0]
86
  temp_info = {
87
  "id": submission_info["id"],
 
88
  "submission_id": submission_info["submissions"]["submission_id"],
89
  "submission_comment": submission_info["submissions"]["submission_comment"],
90
  "status": submission_info["submissions"]["status"],
91
  "selected": submission_info["submissions"]["selected"],
92
  "public_score": submission_info["submissions"]["public_score"],
93
  # "private_score": submission_info["submissions"]["private_score"],
94
+ "submission_datetime": submission_info["submissions"]["datetime"],
 
95
  }
96
  for score in other_scores:
97
  temp_info[score] = submission_info["submissions"][score]
 
112
  start_time = time.time()
113
  submissions = []
114
  for submission in glob.glob(os.path.join(submissions_folder, "submission_info", "*.json")):
115
+ with open(submission, "r", encoding="utf-8") as f:
116
  submission_info = json.load(f)
117
  submission_info["submissions"] = [
118
+ sub for sub in submission_info["submissions"] if sub["status"] == "success"
119
  ]
120
  if len(submission_info["submissions"]) == 0:
121
  continue
 
146
  if selected_submissions == 0:
147
  # select submissions with best public score
148
  submission_info["submissions"].sort(
149
+ key=lambda x: x["public_score"], reverse=self.eval_higher_is_better
 
150
  )
151
  # select only the best submission
152
  submission_info["submissions"] = submission_info["submissions"][0]
 
155
  submission_info["submissions"] = [sub for sub in submission_info["submissions"] if sub["selected"]]
156
  # sort by private score
157
  submission_info["submissions"].sort(
158
+ key=lambda x: x["private_score"], reverse=self.eval_higher_is_better
 
159
  )
160
  # select only the best submission
161
  submission_info["submissions"] = submission_info["submissions"][0]
 
165
  sub for sub in submission_info["submissions"] if not sub["selected"]
166
  ]
167
  temp_best_public_submissions.sort(
168
+ key=lambda x: x["public_score"], reverse=self.eval_higher_is_better
 
169
  )
170
  missing_candidates = self.max_selected_submissions - len(temp_selected_submissions)
171
  temp_best_public_submissions = temp_best_public_submissions[:missing_candidates]
172
  submission_info["submissions"] = temp_selected_submissions + temp_best_public_submissions
173
  submission_info["submissions"].sort(
174
+ key=lambda x: x["private_score"], reverse=self.eval_higher_is_better
 
175
  )
176
  submission_info["submissions"] = submission_info["submissions"][0]
177
 
178
  temp_info = {
179
  "id": submission_info["id"],
 
180
  "submission_id": submission_info["submissions"]["submission_id"],
181
  "submission_comment": submission_info["submissions"]["submission_comment"],
182
  "status": submission_info["submissions"]["status"],
183
  "selected": submission_info["submissions"]["selected"],
184
  "private_score": submission_info["submissions"]["private_score"],
185
+ "submission_datetime": submission_info["submissions"]["datetime"],
 
186
  }
187
  for score in other_scores:
188
  temp_info[score] = submission_info["submissions"][score]
 
200
  return pd.DataFrame()
201
 
202
  df = pd.DataFrame(submissions)
203
+
204
+ # convert submission datetime to pandas datetime
205
+ df["submission_datetime"] = pd.to_datetime(df["submission_datetime"], format="%Y-%m-%d %H:%M:%S")
206
+
207
  # only keep submissions before the end date
208
  df = df[df["submission_datetime"] < self.end_date].reset_index(drop=True)
209
 
 
248
  columns = self.public_columns if not private else self.private_columns
249
  logger.info(columns)
250
  # remove duplicate columns
 
251
  columns = list(dict.fromkeys(columns))
252
 
253
  # send submission_datetime to the end
competitions/params.py CHANGED
@@ -9,12 +9,13 @@ class EvalParams(BaseModel):
9
  competition_type: str
10
  metric: str
11
  token: str
12
- user_id: str
13
  submission_id: str
14
  submission_id_col: str
15
  submission_cols: List[str]
16
  submission_rows: int
17
  output_path: str
 
18
 
19
  class Config:
20
  protected_namespaces = ()
 
9
  competition_type: str
10
  metric: str
11
  token: str
12
+ team_id: str
13
  submission_id: str
14
  submission_id_col: str
15
  submission_cols: List[str]
16
  submission_rows: int
17
  output_path: str
18
+ submission_repo: str
19
 
20
  class Config:
21
  protected_namespaces = ()
competitions/runner.py CHANGED
@@ -1,17 +1,30 @@
1
  import glob
 
2
  import json
3
  import os
 
 
4
  import time
5
  from dataclasses import dataclass
6
 
7
  import pandas as pd
8
- from huggingface_hub import snapshot_download
9
  from loguru import logger
10
 
11
  from competitions.info import CompetitionInfo
12
  from competitions.utils import run_evaluation
13
 
14
 
 
 
 
 
 
 
 
 
 
 
15
  @dataclass
16
  class JobRunner:
17
  competition_info: CompetitionInfo
@@ -27,56 +40,121 @@ class JobRunner:
27
  self.submission_rows = self.competition_info.submission_rows
28
 
29
  def get_pending_subs(self):
30
- user_jsons = snapshot_download(
31
  repo_id=self.competition_id,
32
  allow_patterns="submission_info/*.json",
33
  token=self.token,
34
  repo_type="dataset",
35
  )
36
- user_jsons = glob.glob(os.path.join(user_jsons, "submission_info/*.json"))
37
  pending_submissions = []
38
- for _json in user_jsons:
39
  _json = json.load(open(_json, "r", encoding="utf-8"))
40
- user_id = _json["id"]
41
  for sub in _json["submissions"]:
42
- # if sub["status"] == "pending":
43
- pending_submissions.append(
44
- {
45
- "user_id": user_id,
46
- "submission_id": sub["submission_id"],
47
- "date": sub["date"],
48
- "time": sub["time"],
49
- }
50
- )
51
  if len(pending_submissions) == 0:
52
  logger.info("No pending submissions.")
53
  return None
54
  logger.info(f"Found {len(pending_submissions)} pending submissions.")
55
  pending_submissions = pd.DataFrame(pending_submissions)
56
- pending_submissions = pending_submissions.sort_values(by=["date", "time"])
 
57
  pending_submissions = pending_submissions.reset_index(drop=True)
58
  return pending_submissions
59
 
60
  def run_local(self, pending_submissions):
61
  for _, row in pending_submissions.iterrows():
62
- user_id = row["user_id"]
63
  submission_id = row["submission_id"]
64
  eval_params = {
65
  "competition_id": self.competition_id,
66
  "competition_type": self.competition_type,
67
  "metric": self.metric,
68
  "token": self.token,
69
- "user_id": user_id,
70
  "submission_id": submission_id,
71
  "submission_id_col": self.submission_id_col,
72
  "submission_cols": self.submission_cols,
73
  "submission_rows": self.submission_rows,
74
  "output_path": self.output_path,
 
75
  }
76
  eval_params = json.dumps(eval_params)
77
  eval_pid = run_evaluation(eval_params, local=True, wait=True)
78
  logger.info(f"New evaluation process started with pid {eval_pid}.")
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  def run(self):
81
  while True:
82
  pending_submissions = self.get_pending_subs()
@@ -85,4 +163,10 @@ class JobRunner:
85
  continue
86
  if self.competition_type == "generic":
87
  self.run_local(pending_submissions)
 
 
 
 
 
 
88
  time.sleep(5)
 
1
  import glob
2
+ import io
3
  import json
4
  import os
5
+ import random
6
+ import string
7
  import time
8
  from dataclasses import dataclass
9
 
10
  import pandas as pd
11
+ from huggingface_hub import HfApi, snapshot_download
12
  from loguru import logger
13
 
14
  from competitions.info import CompetitionInfo
15
  from competitions.utils import run_evaluation
16
 
17
 
18
+ _DOCKERFILE = """
19
+ FROM huggingface/competitions:latest
20
+
21
+ CMD uvicorn competitions.app:app --port 7860 --host 0.0.0.0
22
+ """
23
+
24
+ # format _DOCKERFILE
25
+ _DOCKERFILE = _DOCKERFILE.replace("\n", " ").replace(" ", "\n").strip()
26
+
27
+
28
  @dataclass
29
  class JobRunner:
30
  competition_info: CompetitionInfo
 
40
  self.submission_rows = self.competition_info.submission_rows
41
 
42
  def get_pending_subs(self):
43
+ submission_jsons = snapshot_download(
44
  repo_id=self.competition_id,
45
  allow_patterns="submission_info/*.json",
46
  token=self.token,
47
  repo_type="dataset",
48
  )
49
+ submission_jsons = glob.glob(os.path.join(submission_jsons, "submission_info/*.json"))
50
  pending_submissions = []
51
+ for _json in submission_jsons:
52
  _json = json.load(open(_json, "r", encoding="utf-8"))
53
+ team_id = _json["id"]
54
  for sub in _json["submissions"]:
55
+ if sub["status"] == "pending":
56
+ pending_submissions.append(
57
+ {
58
+ "team_id": team_id,
59
+ "submission_id": sub["submission_id"],
60
+ "datetime": sub["datetime"],
61
+ "submission_repo": sub["submission_repo"],
62
+ }
63
+ )
64
  if len(pending_submissions) == 0:
65
  logger.info("No pending submissions.")
66
  return None
67
  logger.info(f"Found {len(pending_submissions)} pending submissions.")
68
  pending_submissions = pd.DataFrame(pending_submissions)
69
+ pending_submissions["datetime"] = pd.to_datetime(pending_submissions["datetime"])
70
+ pending_submissions = pending_submissions.sort_values("datetime")
71
  pending_submissions = pending_submissions.reset_index(drop=True)
72
  return pending_submissions
73
 
74
  def run_local(self, pending_submissions):
75
  for _, row in pending_submissions.iterrows():
76
+ team_id = row["team_id"]
77
  submission_id = row["submission_id"]
78
  eval_params = {
79
  "competition_id": self.competition_id,
80
  "competition_type": self.competition_type,
81
  "metric": self.metric,
82
  "token": self.token,
83
+ "team_id": team_id,
84
  "submission_id": submission_id,
85
  "submission_id_col": self.submission_id_col,
86
  "submission_cols": self.submission_cols,
87
  "submission_rows": self.submission_rows,
88
  "output_path": self.output_path,
89
+ "submission_repo": row["submission_repo"],
90
  }
91
  eval_params = json.dumps(eval_params)
92
  eval_pid = run_evaluation(eval_params, local=True, wait=True)
93
  logger.info(f"New evaluation process started with pid {eval_pid}.")
94
 
95
+ def _create_readme(self, project_name):
96
+ _readme = "---\n"
97
+ _readme += f"title: {project_name}\n"
98
+ _readme += "emoji: 🚀\n"
99
+ _readme += "colorFrom: green\n"
100
+ _readme += "colorTo: indigo\n"
101
+ _readme += "sdk: docker\n"
102
+ _readme += "pinned: false\n"
103
+ _readme += "duplicated_from: autotrain-projects/autotrain-advanced\n"
104
+ _readme += "---\n"
105
+ _readme = io.BytesIO(_readme.encode())
106
+ return _readme
107
+
108
+ def create_space(self, team_id, submission_id, submission_repo):
109
+ project_name = "".join(
110
+ random.choices(
111
+ string.ascii_lowercase + string.digits,
112
+ k=10,
113
+ )
114
+ )
115
+ api = HfApi(token=self.token)
116
+ username = self.competition_id.split("/")[0]
117
+ repo_id = f"{username}/competitions-{project_name}"
118
+ api.create_repo(
119
+ repo_id=repo_id,
120
+ repo_type="space",
121
+ space_sdk="docker",
122
+ space_hardware="cpu-basic",
123
+ private=True,
124
+ )
125
+ params = {
126
+ "competition_id": self.competition_id,
127
+ "competition_type": self.competition_type,
128
+ "metric": self.metric,
129
+ "token": self.token,
130
+ "team_id": team_id,
131
+ "submission_id": submission_id,
132
+ "submission_id_col": self.submission_id_col,
133
+ "submission_cols": self.submission_cols,
134
+ "submission_rows": self.submission_rows,
135
+ "output_path": self.output_path,
136
+ "submission_repo": submission_repo,
137
+ }
138
+
139
+ api.add_space_secret(repo_id=repo_id, key="PARAMS", value=json.dumps(params))
140
+
141
+ readme = self._create_readme(project_name)
142
+ api.upload_file(
143
+ path_or_fileobj=readme,
144
+ path_in_repo="README.md",
145
+ repo_id=repo_id,
146
+ repo_type="space",
147
+ )
148
+
149
+ _dockerfile = io.BytesIO(_DOCKERFILE.encode())
150
+ api.upload_file(
151
+ path_or_fileobj=_dockerfile,
152
+ path_in_repo="Dockerfile",
153
+ repo_id=repo_id,
154
+ repo_type="space",
155
+ )
156
+ return repo_id
157
+
158
  def run(self):
159
  while True:
160
  pending_submissions = self.get_pending_subs()
 
163
  continue
164
  if self.competition_type == "generic":
165
  self.run_local(pending_submissions)
166
+ elif self.competition_type == "code":
167
+ for _, row in pending_submissions.iterrows():
168
+ team_id = row["team_id"]
169
+ submission_id = row["submission_id"]
170
+ submission_repo = row["submission_repo"]
171
+ self.create_space(team_id, submission_id, submission_repo)
172
  time.sleep(5)
competitions/submissions.py CHANGED
@@ -5,7 +5,7 @@ from dataclasses import dataclass
5
  from datetime import datetime
6
 
7
  import pandas as pd
8
- from huggingface_hub import HfApi, hf_hub_download
9
  from huggingface_hub.utils._errors import EntryNotFoundError
10
  from loguru import logger
11
 
@@ -16,6 +16,7 @@ from .utils import user_authentication
16
  @dataclass
17
  class Submissions:
18
  competition_id: str
 
19
  submission_limit: str
20
  end_date: datetime
21
  token: str
@@ -42,38 +43,35 @@ class Submissions:
42
  def _verify_submission(self, bytes_data):
43
  return True
44
 
45
- def _add_new_user(self, user_info):
46
  api = HfApi(token=self.token)
47
- user_submission_info = {}
48
- user_submission_info["name"] = user_info["name"]
49
- user_submission_info["id"] = user_info["id"]
50
- user_submission_info["submissions"] = []
51
- # convert user_submission_info to BufferedIOBase file object
52
- user_submission_info_json = json.dumps(user_submission_info, indent=4)
53
- user_submission_info_json_bytes = user_submission_info_json.encode("utf-8")
54
- user_submission_info_json_buffer = io.BytesIO(user_submission_info_json_bytes)
55
 
56
  api.upload_file(
57
- path_or_fileobj=user_submission_info_json_buffer,
58
- path_in_repo=f"submission_info/{user_info['id']}.json",
59
  repo_id=self.competition_id,
60
  repo_type="dataset",
61
  )
62
 
63
- def _check_user_submission_limit(self, user_info):
64
- user_id = user_info["id"]
65
  try:
66
- user_fname = hf_hub_download(
67
  repo_id=self.competition_id,
68
- filename=f"submission_info/{user_id}.json",
69
  token=self.token,
70
  repo_type="dataset",
71
  )
72
  except EntryNotFoundError:
73
- self._add_new_user(user_info)
74
- user_fname = hf_hub_download(
75
  repo_id=self.competition_id,
76
- filename=f"submission_info/{user_id}.json",
77
  token=self.token,
78
  repo_type="dataset",
79
  )
@@ -81,36 +79,37 @@ class Submissions:
81
  logger.error(e)
82
  raise Exception("Hugging Face Hub is unreachable, please try again later.")
83
 
84
- with open(user_fname, "r", encoding="utf-8") as f:
85
- user_submission_info = json.load(f)
86
 
87
  todays_date = datetime.now().strftime("%Y-%m-%d")
88
- if len(user_submission_info["submissions"]) == 0:
89
- user_submission_info["submissions"] = []
90
 
91
  # count the number of times user has submitted today
92
  todays_submissions = 0
93
- for sub in user_submission_info["submissions"]:
94
- if sub["date"] == todays_date:
 
 
95
  todays_submissions += 1
96
  if todays_submissions >= self.submission_limit:
97
  return False
98
  return True
99
 
100
- def _submissions_today(self, user_info):
101
- user_id = user_info["id"]
102
  try:
103
- user_fname = hf_hub_download(
104
  repo_id=self.competition_id,
105
- filename=f"submission_info/{user_id}.json",
106
  token=self.token,
107
  repo_type="dataset",
108
  )
109
  except EntryNotFoundError:
110
- self._add_new_user(user_info)
111
- user_fname = hf_hub_download(
112
  repo_id=self.competition_id,
113
- filename=f"submission_info/{user_id}.json",
114
  token=self.token,
115
  repo_type="dataset",
116
  )
@@ -118,39 +117,43 @@ class Submissions:
118
  logger.error(e)
119
  raise Exception("Hugging Face Hub is unreachable, please try again later.")
120
 
121
- with open(user_fname, "r", encoding="utf-8") as f:
122
- user_submission_info = json.load(f)
123
 
124
  todays_date = datetime.now().strftime("%Y-%m-%d")
125
- if len(user_submission_info["submissions"]) == 0:
126
- user_submission_info["submissions"] = []
127
 
128
  # count the number of times user has submitted today
129
  todays_submissions = 0
130
- for sub in user_submission_info["submissions"]:
131
- if sub["date"] == todays_date:
 
 
132
  todays_submissions += 1
133
  return todays_submissions
134
 
135
- def _increment_submissions(self, user_id, submission_id, submission_comment):
136
- user_fname = hf_hub_download(
 
 
137
  repo_id=self.competition_id,
138
- filename=f"submission_info/{user_id}.json",
139
  token=self.token,
140
  repo_type="dataset",
141
  )
142
- with open(user_fname, "r", encoding="utf-8") as f:
143
- user_submission_info = json.load(f)
144
- todays_date = datetime.now().strftime("%Y-%m-%d")
145
- current_time = datetime.now().strftime("%H:%M:%S")
146
 
147
  # here goes all the default stuff for submission
148
- user_submission_info["submissions"].append(
149
  {
150
- "date": todays_date,
151
- "time": current_time,
152
  "submission_id": submission_id,
153
  "submission_comment": submission_comment,
 
 
154
  "status": "pending",
155
  "selected": False,
156
  "public_score": -1,
@@ -159,33 +162,35 @@ class Submissions:
159
  )
160
  # count the number of times user has submitted today
161
  todays_submissions = 0
162
- for sub in user_submission_info["submissions"]:
163
- if sub["date"] == todays_date:
 
 
 
164
  todays_submissions += 1
165
 
166
- # convert user_submission_info to BufferedIOBase file object
167
- user_submission_info_json = json.dumps(user_submission_info, indent=4)
168
- user_submission_info_json_bytes = user_submission_info_json.encode("utf-8")
169
- user_submission_info_json_buffer = io.BytesIO(user_submission_info_json_bytes)
170
  api = HfApi(token=self.token)
171
  api.upload_file(
172
- path_or_fileobj=user_submission_info_json_buffer,
173
- path_in_repo=f"submission_info/{user_id}.json",
174
  repo_id=self.competition_id,
175
  repo_type="dataset",
176
  )
177
  return todays_submissions
178
 
179
- def _download_user_subs(self, user_id):
180
- user_fname = hf_hub_download(
181
  repo_id=self.competition_id,
182
- filename=f"submission_info/{user_id}.json",
183
  token=self.token,
184
  repo_type="dataset",
185
  )
186
- with open(user_fname, "r", encoding="utf-8") as f:
187
- user_submission_info = json.load(f)
188
- return user_submission_info["submissions"]
189
 
190
  def update_selected_submissions(self, user_token, selected_submission_ids):
191
  current_datetime = datetime.now()
@@ -194,44 +199,44 @@ class Submissions:
194
 
195
  user_info = self._get_user_info(user_token)
196
  user_id = user_info["id"]
 
197
 
198
- user_fname = hf_hub_download(
199
  repo_id=self.competition_id,
200
- filename=f"submission_info/{user_id}.json",
201
  token=self.token,
202
  repo_type="dataset",
203
  )
204
- with open(user_fname, "r", encoding="utf-8") as f:
205
- user_submission_info = json.load(f)
206
 
207
- for sub in user_submission_info["submissions"]:
208
  if sub["submission_id"] in selected_submission_ids:
209
  sub["selected"] = True
210
  else:
211
  sub["selected"] = False
212
 
213
- # convert user_submission_info to BufferedIOBase file object
214
- user_submission_info_json = json.dumps(user_submission_info, indent=4)
215
- user_submission_info_json_bytes = user_submission_info_json.encode("utf-8")
216
- user_submission_info_json_buffer = io.BytesIO(user_submission_info_json_bytes)
217
  api = HfApi(token=self.token)
218
  api.upload_file(
219
- path_or_fileobj=user_submission_info_json_buffer,
220
- path_in_repo=f"submission_info/{user_id}.json",
221
  repo_id=self.competition_id,
222
  repo_type="dataset",
223
  )
224
 
225
- def _get_user_subs(self, user_info, private=False):
226
- # get user submissions
227
  user_id = user_info["id"]
 
228
  try:
229
- user_submissions = self._download_user_subs(user_id)
230
  except EntryNotFoundError:
231
  logger.warning("No submissions found for user")
232
  return pd.DataFrame(), pd.DataFrame()
233
 
234
- submissions_df = pd.DataFrame(user_submissions)
235
 
236
  if not private:
237
  submissions_df = submissions_df.drop(columns=["private_score"])
@@ -314,47 +319,95 @@ class Submissions:
314
  private = False
315
  if current_date_time >= self.end_date:
316
  private = True
317
- success_subs, failed_subs = self._get_user_subs(user_info, private=private)
318
  return success_subs, failed_subs
319
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320
  def new_submission(self, user_token, uploaded_file, submission_comment):
321
  # verify token
322
  user_info = self._get_user_info(user_token)
 
 
 
323
 
324
- # check if user can submit to the competition
325
- if self._check_user_submission_limit(user_info) is False:
326
  raise SubmissionLimitError("Submission limit reached")
327
 
328
- logger.info(type(uploaded_file))
329
- bytes_data = uploaded_file.file.read()
 
 
 
330
 
331
- # verify file is valid
332
- if not self._verify_submission(bytes_data):
333
- raise SubmissionError("Invalid submission file")
334
- else:
335
- user_id = user_info["id"]
336
- submission_id = str(uuid.uuid4())
337
  file_extension = uploaded_file.filename.split(".")[-1]
338
  # upload file to hf hub
339
  api = HfApi(token=self.token)
340
  api.upload_file(
341
  path_or_fileobj=bytes_data,
342
- path_in_repo=f"submissions/{user_id}-{submission_id}.{file_extension}",
343
  repo_id=self.competition_id,
344
  repo_type="dataset",
345
  )
346
- # update submission limit
347
  submissions_made = self._increment_submissions(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
  user_id=user_id,
349
  submission_id=submission_id,
350
- submission_comment="",
 
351
  )
352
- # TODO: schedule submission for evaluation
353
- # self._create_autotrain_project(
354
- # submission_id=f"{submission_id}",
355
- # competition_id=f"{self.competition_id}",
356
- # user_id=user_id,
357
- # competition_type="generic",
358
- # )
359
  remaining_submissions = self.submission_limit - submissions_made
360
  return remaining_submissions
 
5
  from datetime import datetime
6
 
7
  import pandas as pd
8
+ from huggingface_hub import HfApi, hf_hub_download, snapshot_download
9
  from huggingface_hub.utils._errors import EntryNotFoundError
10
  from loguru import logger
11
 
 
16
  @dataclass
17
  class Submissions:
18
  competition_id: str
19
+ competition_type: str
20
  submission_limit: str
21
  end_date: datetime
22
  token: str
 
43
  def _verify_submission(self, bytes_data):
44
  return True
45
 
46
+ def _add_new_team(self, team_id):
47
  api = HfApi(token=self.token)
48
+ team_submission_info = {}
49
+ team_submission_info["id"] = team_id
50
+ team_submission_info["submissions"] = []
51
+ team_submission_info_json = json.dumps(team_submission_info, indent=4)
52
+ team_submission_info_json_bytes = team_submission_info_json.encode("utf-8")
53
+ team_submission_info_json_buffer = io.BytesIO(team_submission_info_json_bytes)
 
 
54
 
55
  api.upload_file(
56
+ path_or_fileobj=team_submission_info_json_buffer,
57
+ path_in_repo=f"submission_info/{team_id}.json",
58
  repo_id=self.competition_id,
59
  repo_type="dataset",
60
  )
61
 
62
+ def _check_team_submission_limit(self, team_id):
 
63
  try:
64
+ team_fname = hf_hub_download(
65
  repo_id=self.competition_id,
66
+ filename=f"submission_info/{team_id}.json",
67
  token=self.token,
68
  repo_type="dataset",
69
  )
70
  except EntryNotFoundError:
71
+ self._add_new_team(team_id)
72
+ team_fname = hf_hub_download(
73
  repo_id=self.competition_id,
74
+ filename=f"submission_info/{team_id}.json",
75
  token=self.token,
76
  repo_type="dataset",
77
  )
 
79
  logger.error(e)
80
  raise Exception("Hugging Face Hub is unreachable, please try again later.")
81
 
82
+ with open(team_fname, "r", encoding="utf-8") as f:
83
+ team_submission_info = json.load(f)
84
 
85
  todays_date = datetime.now().strftime("%Y-%m-%d")
86
+ if len(team_submission_info["submissions"]) == 0:
87
+ team_submission_info["submissions"] = []
88
 
89
  # count the number of times user has submitted today
90
  todays_submissions = 0
91
+ for sub in team_submission_info["submissions"]:
92
+ submission_datetime = sub["datetime"]
93
+ submission_date = submission_datetime.split(" ")[0]
94
+ if submission_date == todays_date:
95
  todays_submissions += 1
96
  if todays_submissions >= self.submission_limit:
97
  return False
98
  return True
99
 
100
+ def _submissions_today(self, team_id):
 
101
  try:
102
+ team_fname = hf_hub_download(
103
  repo_id=self.competition_id,
104
+ filename=f"submission_info/{team_id}.json",
105
  token=self.token,
106
  repo_type="dataset",
107
  )
108
  except EntryNotFoundError:
109
+ self._add_new_team(team_id)
110
+ team_fname = hf_hub_download(
111
  repo_id=self.competition_id,
112
+ filename=f"submission_info/{team_id}.json",
113
  token=self.token,
114
  repo_type="dataset",
115
  )
 
117
  logger.error(e)
118
  raise Exception("Hugging Face Hub is unreachable, please try again later.")
119
 
120
+ with open(team_fname, "r", encoding="utf-8") as f:
121
+ team_submission_info = json.load(f)
122
 
123
  todays_date = datetime.now().strftime("%Y-%m-%d")
124
+ if len(team_submission_info["submissions"]) == 0:
125
+ team_submission_info["submissions"] = []
126
 
127
  # count the number of times user has submitted today
128
  todays_submissions = 0
129
+ for sub in team_submission_info["submissions"]:
130
+ submission_datetime = sub["datetime"]
131
+ submission_date = submission_datetime.split(" ")[0]
132
+ if submission_date == todays_date:
133
  todays_submissions += 1
134
  return todays_submissions
135
 
136
+ def _increment_submissions(self, team_id, user_id, submission_id, submission_comment, submission_repo=None):
137
+ if submission_repo is None:
138
+ submission_repo = ""
139
+ team_fname = hf_hub_download(
140
  repo_id=self.competition_id,
141
+ filename=f"submission_info/{team_id}.json",
142
  token=self.token,
143
  repo_type="dataset",
144
  )
145
+ with open(team_fname, "r", encoding="utf-8") as f:
146
+ team_submission_info = json.load(f)
147
+ datetime_now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
 
148
 
149
  # here goes all the default stuff for submission
150
+ team_submission_info["submissions"].append(
151
  {
152
+ "datetime": datetime_now,
 
153
  "submission_id": submission_id,
154
  "submission_comment": submission_comment,
155
+ "submission_repo": submission_repo,
156
+ "submitted_by": user_id,
157
  "status": "pending",
158
  "selected": False,
159
  "public_score": -1,
 
162
  )
163
  # count the number of times user has submitted today
164
  todays_submissions = 0
165
+ todays_date = datetime.now().strftime("%Y-%m-%d")
166
+ for sub in team_submission_info["submissions"]:
167
+ submission_datetime = sub["datetime"]
168
+ submission_date = submission_datetime.split(" ")[0]
169
+ if submission_date == todays_date:
170
  todays_submissions += 1
171
 
172
+ team_submission_info_json = json.dumps(team_submission_info, indent=4)
173
+ team_submission_info_json_bytes = team_submission_info_json.encode("utf-8")
174
+ team_submission_info_json_buffer = io.BytesIO(team_submission_info_json_bytes)
 
175
  api = HfApi(token=self.token)
176
  api.upload_file(
177
+ path_or_fileobj=team_submission_info_json_buffer,
178
+ path_in_repo=f"submission_info/{team_id}.json",
179
  repo_id=self.competition_id,
180
  repo_type="dataset",
181
  )
182
  return todays_submissions
183
 
184
+ def _download_team_subs(self, team_id):
185
+ team_fname = hf_hub_download(
186
  repo_id=self.competition_id,
187
+ filename=f"submission_info/{team_id}.json",
188
  token=self.token,
189
  repo_type="dataset",
190
  )
191
+ with open(team_fname, "r", encoding="utf-8") as f:
192
+ team_submission_info = json.load(f)
193
+ return team_submission_info["submissions"]
194
 
195
  def update_selected_submissions(self, user_token, selected_submission_ids):
196
  current_datetime = datetime.now()
 
199
 
200
  user_info = self._get_user_info(user_token)
201
  user_id = user_info["id"]
202
+ team_id = self._get_team_id(user_id)
203
 
204
+ team_fname = hf_hub_download(
205
  repo_id=self.competition_id,
206
+ filename=f"submission_info/{team_id}.json",
207
  token=self.token,
208
  repo_type="dataset",
209
  )
210
+ with open(team_fname, "r", encoding="utf-8") as f:
211
+ team_submission_info = json.load(f)
212
 
213
+ for sub in team_submission_info["submissions"]:
214
  if sub["submission_id"] in selected_submission_ids:
215
  sub["selected"] = True
216
  else:
217
  sub["selected"] = False
218
 
219
+ team_submission_info_json = json.dumps(team_submission_info, indent=4)
220
+ team_submission_info_json_bytes = team_submission_info_json.encode("utf-8")
221
+ team_submission_info_json_buffer = io.BytesIO(team_submission_info_json_bytes)
 
222
  api = HfApi(token=self.token)
223
  api.upload_file(
224
+ path_or_fileobj=team_submission_info_json_buffer,
225
+ path_in_repo=f"submission_info/{team_id}.json",
226
  repo_id=self.competition_id,
227
  repo_type="dataset",
228
  )
229
 
230
+ def _get_team_subs(self, user_info, private=False):
 
231
  user_id = user_info["id"]
232
+ team_id = self._get_team_id(user_id)
233
  try:
234
+ team_submissions = self._download_team_subs(team_id)
235
  except EntryNotFoundError:
236
  logger.warning("No submissions found for user")
237
  return pd.DataFrame(), pd.DataFrame()
238
 
239
+ submissions_df = pd.DataFrame(team_submissions)
240
 
241
  if not private:
242
  submissions_df = submissions_df.drop(columns=["private_score"])
 
319
  private = False
320
  if current_date_time >= self.end_date:
321
  private = True
322
+ success_subs, failed_subs = self._get_team_subs(user_info, private=private)
323
  return success_subs, failed_subs
324
 
325
+ def _get_team_id(self, user_id):
326
+ user_team = hf_hub_download(
327
+ repo_id=self.competition_id,
328
+ filename="user_team.json",
329
+ token=self.token,
330
+ repo_type="dataset",
331
+ )
332
+ with open(user_team, "r", encoding="utf-8") as f:
333
+ user_team = json.load(f)
334
+
335
+ if user_id in user_team:
336
+ return user_team[user_id]
337
+
338
+ # create a new team, if user is not in any team
339
+ team_id = str(uuid.uuid4())
340
+ user_team[user_id] = team_id
341
+ user_team_json = json.dumps(user_team, indent=4)
342
+ user_team_json_bytes = user_team_json.encode("utf-8")
343
+ user_team_json_buffer = io.BytesIO(user_team_json_bytes)
344
+ api = HfApi(token=self.token)
345
+ api.upload_file(
346
+ path_or_fileobj=user_team_json_buffer,
347
+ path_in_repo="user_team.json",
348
+ repo_id=self.competition_id,
349
+ repo_type="dataset",
350
+ )
351
+ return team_id
352
+
353
  def new_submission(self, user_token, uploaded_file, submission_comment):
354
  # verify token
355
  user_info = self._get_user_info(user_token)
356
+ submission_id = str(uuid.uuid4())
357
+ user_id = user_info["id"]
358
+ team_id = self._get_team_id(user_id)
359
 
360
+ # check if team can submit to the competition
361
+ if self._check_team_submission_limit(team_id) is False:
362
  raise SubmissionLimitError("Submission limit reached")
363
 
364
+ if self.competition_type == "generic":
365
+ bytes_data = uploaded_file.file.read()
366
+ # verify file is valid
367
+ if not self._verify_submission(bytes_data):
368
+ raise SubmissionError("Invalid submission file")
369
 
 
 
 
 
 
 
370
  file_extension = uploaded_file.filename.split(".")[-1]
371
  # upload file to hf hub
372
  api = HfApi(token=self.token)
373
  api.upload_file(
374
  path_or_fileobj=bytes_data,
375
+ path_in_repo=f"submissions/{team_id}-{submission_id}.{file_extension}",
376
  repo_id=self.competition_id,
377
  repo_type="dataset",
378
  )
 
379
  submissions_made = self._increment_submissions(
380
+ team_id=team_id,
381
+ user_id=user_id,
382
+ submission_id=submission_id,
383
+ submission_comment=submission_comment,
384
+ submission_repo="",
385
+ )
386
+ else:
387
+ submission_repo = snapshot_download(
388
+ repo_id=uploaded_file,
389
+ local_dir=submission_id,
390
+ token=user_token,
391
+ repo_type="model",
392
+ )
393
+ api = HfApi(token=self.token)
394
+ competition_user = self.competition_id.split("/")[0]
395
+ api.create_repo(
396
+ repo_id=f"{competition_user}/{submission_id}",
397
+ repo_type="model",
398
+ private=True,
399
+ )
400
+ api.upload_folder(
401
+ folder_path=submission_repo,
402
+ repo_id=f"{competition_user}/{submission_id}",
403
+ repo_type="model",
404
+ )
405
+ submissions_made = self._increment_submissions(
406
+ team_id=team_id,
407
  user_id=user_id,
408
  submission_id=submission_id,
409
+ submission_comment=submission_comment,
410
+ submission_repo=uploaded_file,
411
  )
 
 
 
 
 
 
 
412
  remaining_submissions = self.submission_limit - submissions_made
413
  return remaining_submissions
competitions/utils.py CHANGED
@@ -73,7 +73,7 @@ def pause_space(params):
73
  def download_submission_info(params):
74
  user_fname = hf_hub_download(
75
  repo_id=params.competition_id,
76
- filename=f"submission_info/{params.user_id}.json",
77
  token=params.token,
78
  repo_type="dataset",
79
  )
@@ -90,7 +90,7 @@ def upload_submission_info(params, user_submission_info):
90
  api = HfApi(token=params.token)
91
  api.upload_file(
92
  path_or_fileobj=user_submission_info_json_buffer,
93
- path_in_repo=f"submission_info/{params.user_id}.json",
94
  repo_id=params.competition_id,
95
  repo_type="dataset",
96
  )
 
73
  def download_submission_info(params):
74
  user_fname = hf_hub_download(
75
  repo_id=params.competition_id,
76
+ filename=f"submission_info/{params.team_id}.json",
77
  token=params.token,
78
  repo_type="dataset",
79
  )
 
90
  api = HfApi(token=params.token)
91
  api.upload_file(
92
  path_or_fileobj=user_submission_info_json_buffer,
93
+ path_in_repo=f"submission_info/{params.team_id}.json",
94
  repo_id=params.competition_id,
95
  repo_type="dataset",
96
  )