Abhishek Thakur
commited on
Commit
·
a2fa160
1
Parent(s):
1094cbb
migrate to teams
Browse files- .dockerignore +1 -0
- .gitignore +1 -0
- 16337e22-7815-4ebd-a6c4-7a58dc46e214/.gitattributes +35 -0
- 16337e22-7815-4ebd-a6c4-7a58dc46e214/script.py +9 -0
- competitions/api.py +109 -0
- competitions/app.py +5 -1
- competitions/compute_metrics.py +2 -2
- competitions/evaluate.py +26 -5
- competitions/info.py +0 -4
- competitions/leaderboard.py +19 -26
- competitions/params.py +2 -1
- competitions/runner.py +101 -17
- competitions/submissions.py +151 -98
- competitions/utils.py +2 -2
.dockerignore
CHANGED
@@ -4,6 +4,7 @@
|
|
4 |
.vim/
|
5 |
flagged/
|
6 |
*.csv
|
|
|
7 |
|
8 |
# Byte-compiled / optimized / DLL files
|
9 |
__pycache__/
|
|
|
4 |
.vim/
|
5 |
flagged/
|
6 |
*.csv
|
7 |
+
*.db
|
8 |
|
9 |
# Byte-compiled / optimized / DLL files
|
10 |
__pycache__/
|
.gitignore
CHANGED
@@ -4,6 +4,7 @@
|
|
4 |
.vim/
|
5 |
flagged/
|
6 |
*.csv
|
|
|
7 |
|
8 |
# Byte-compiled / optimized / DLL files
|
9 |
__pycache__/
|
|
|
4 |
.vim/
|
5 |
flagged/
|
6 |
*.csv
|
7 |
+
*.db
|
8 |
|
9 |
# Byte-compiled / optimized / DLL files
|
10 |
__pycache__/
|
16337e22-7815-4ebd-a6c4-7a58dc46e214/.gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
16337e22-7815-4ebd-a6c4-7a58dc46e214/script.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
|
4 |
+
sub = []
|
5 |
+
for i in range(10000):
|
6 |
+
sub.append((i, 0.5))
|
7 |
+
|
8 |
+
sub = pd.DataFrame(sub, columns=["id", "pred"])
|
9 |
+
sub.to_csv("submission.csv", index=False)
|
competitions/api.py
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
import os
|
3 |
+
import signal
|
4 |
+
import sqlite3
|
5 |
+
from contextlib import asynccontextmanager
|
6 |
+
|
7 |
+
import psutil
|
8 |
+
from fastapi import FastAPI
|
9 |
+
from loguru import logger
|
10 |
+
|
11 |
+
from competitions.utils import run_evaluation
|
12 |
+
|
13 |
+
|
14 |
+
def get_process_status(pid):
|
15 |
+
try:
|
16 |
+
process = psutil.Process(pid)
|
17 |
+
proc_status = process.status()
|
18 |
+
return proc_status
|
19 |
+
except psutil.NoSuchProcess:
|
20 |
+
logger.info(f"No process found with PID: {pid}")
|
21 |
+
return "Completed"
|
22 |
+
|
23 |
+
|
24 |
+
def kill_process_by_pid(pid):
|
25 |
+
"""Kill process by PID."""
|
26 |
+
os.kill(pid, signal.SIGTERM)
|
27 |
+
|
28 |
+
|
29 |
+
class JobDB:
|
30 |
+
def __init__(self, db_path):
|
31 |
+
self.db_path = db_path
|
32 |
+
self.conn = sqlite3.connect(db_path)
|
33 |
+
self.c = self.conn.cursor()
|
34 |
+
self.create_jobs_table()
|
35 |
+
|
36 |
+
def create_jobs_table(self):
|
37 |
+
self.c.execute(
|
38 |
+
"""CREATE TABLE IF NOT EXISTS jobs
|
39 |
+
(id INTEGER PRIMARY KEY, pid INTEGER)"""
|
40 |
+
)
|
41 |
+
self.conn.commit()
|
42 |
+
|
43 |
+
def add_job(self, pid):
|
44 |
+
sql = f"INSERT INTO jobs (pid) VALUES ({pid})"
|
45 |
+
self.c.execute(sql)
|
46 |
+
self.conn.commit()
|
47 |
+
|
48 |
+
def get_running_jobs(self):
|
49 |
+
self.c.execute("""SELECT pid FROM jobs""")
|
50 |
+
running_pids = self.c.fetchall()
|
51 |
+
running_pids = [pid[0] for pid in running_pids]
|
52 |
+
return running_pids
|
53 |
+
|
54 |
+
def delete_job(self, pid):
|
55 |
+
sql = f"DELETE FROM jobs WHERE pid={pid}"
|
56 |
+
self.c.execute(sql)
|
57 |
+
self.conn.commit()
|
58 |
+
|
59 |
+
|
60 |
+
PARAMS = os.environ.get("PARAMS")
|
61 |
+
DB = JobDB("job.db")
|
62 |
+
|
63 |
+
|
64 |
+
class BackgroundRunner:
|
65 |
+
async def run_main(self):
|
66 |
+
while True:
|
67 |
+
running_jobs = DB.get_running_jobs()
|
68 |
+
if running_jobs:
|
69 |
+
for _pid in running_jobs:
|
70 |
+
proc_status = get_process_status(_pid)
|
71 |
+
proc_status = proc_status.strip().lower()
|
72 |
+
if proc_status in ("completed", "error", "zombie"):
|
73 |
+
logger.info(f"Process {_pid} is already completed. Skipping...")
|
74 |
+
try:
|
75 |
+
kill_process_by_pid(_pid)
|
76 |
+
except Exception as e:
|
77 |
+
logger.info(f"Error while killing process: {e}")
|
78 |
+
DB.delete_job(_pid)
|
79 |
+
|
80 |
+
running_jobs = DB.get_running_jobs()
|
81 |
+
if not running_jobs:
|
82 |
+
logger.info("No running jobs found. Shutting down the server.")
|
83 |
+
os.kill(os.getpid(), signal.SIGINT)
|
84 |
+
await asyncio.sleep(30)
|
85 |
+
|
86 |
+
|
87 |
+
runner = BackgroundRunner()
|
88 |
+
|
89 |
+
|
90 |
+
@asynccontextmanager
|
91 |
+
async def lifespan(app: FastAPI):
|
92 |
+
process_pid = run_evaluation(params=PARAMS)
|
93 |
+
logger.info(f"Started training with PID {process_pid}")
|
94 |
+
DB.add_job(process_pid)
|
95 |
+
asyncio.create_task(runner.run_main())
|
96 |
+
yield
|
97 |
+
|
98 |
+
|
99 |
+
api = FastAPI(lifespan=lifespan)
|
100 |
+
|
101 |
+
|
102 |
+
@api.get("/")
|
103 |
+
async def root():
|
104 |
+
return "Your model is being evaluated..."
|
105 |
+
|
106 |
+
|
107 |
+
@api.get("/health")
|
108 |
+
async def health():
|
109 |
+
return "OK"
|
competitions/app.py
CHANGED
@@ -5,6 +5,7 @@ from fastapi import FastAPI, File, Form, Request, UploadFile
|
|
5 |
from fastapi.responses import HTMLResponse, JSONResponse
|
6 |
from fastapi.staticfiles import StaticFiles
|
7 |
from fastapi.templating import Jinja2Templates
|
|
|
8 |
from pydantic import BaseModel
|
9 |
|
10 |
from competitions.info import CompetitionInfo
|
@@ -83,6 +84,7 @@ async def get_leaderboard(request: Request, lb: str):
|
|
83 |
autotrain_token=HF_TOKEN,
|
84 |
)
|
85 |
df = leaderboard.fetch(private=lb == "private")
|
|
|
86 |
resp = {"response": df.to_markdown(index=False)}
|
87 |
return resp
|
88 |
|
@@ -94,6 +96,7 @@ async def my_submissions(request: Request, user: User):
|
|
94 |
submission_limit=COMP_INFO.submission_limit,
|
95 |
competition_id=COMPETITION_ID,
|
96 |
token=HF_TOKEN,
|
|
|
97 |
)
|
98 |
success_subs, failed_subs = sub.my_submissions(user.user_token)
|
99 |
success_subs = success_subs.to_markdown(index=False)
|
@@ -107,7 +110,7 @@ async def my_submissions(request: Request, user: User):
|
|
107 |
|
108 |
@app.post("/new_submission", response_class=JSONResponse)
|
109 |
async def new_submission(
|
110 |
-
submission_file: UploadFile = File(
|
111 |
hub_model: str = Form(...),
|
112 |
token: str = Form(...),
|
113 |
submission_comment: str = Form(...),
|
@@ -117,6 +120,7 @@ async def new_submission(
|
|
117 |
submission_limit=COMP_INFO.submission_limit,
|
118 |
competition_id=COMPETITION_ID,
|
119 |
token=HF_TOKEN,
|
|
|
120 |
)
|
121 |
if COMP_INFO.competition_type == "generic":
|
122 |
resp = sub.new_submission(token, submission_file, submission_comment)
|
|
|
5 |
from fastapi.responses import HTMLResponse, JSONResponse
|
6 |
from fastapi.staticfiles import StaticFiles
|
7 |
from fastapi.templating import Jinja2Templates
|
8 |
+
from loguru import logger
|
9 |
from pydantic import BaseModel
|
10 |
|
11 |
from competitions.info import CompetitionInfo
|
|
|
84 |
autotrain_token=HF_TOKEN,
|
85 |
)
|
86 |
df = leaderboard.fetch(private=lb == "private")
|
87 |
+
logger.info(df)
|
88 |
resp = {"response": df.to_markdown(index=False)}
|
89 |
return resp
|
90 |
|
|
|
96 |
submission_limit=COMP_INFO.submission_limit,
|
97 |
competition_id=COMPETITION_ID,
|
98 |
token=HF_TOKEN,
|
99 |
+
competition_type=COMP_INFO.competition_type,
|
100 |
)
|
101 |
success_subs, failed_subs = sub.my_submissions(user.user_token)
|
102 |
success_subs = success_subs.to_markdown(index=False)
|
|
|
110 |
|
111 |
@app.post("/new_submission", response_class=JSONResponse)
|
112 |
async def new_submission(
|
113 |
+
submission_file: UploadFile = File(None),
|
114 |
hub_model: str = Form(...),
|
115 |
token: str = Form(...),
|
116 |
submission_comment: str = Form(...),
|
|
|
120 |
submission_limit=COMP_INFO.submission_limit,
|
121 |
competition_id=COMPETITION_ID,
|
122 |
token=HF_TOKEN,
|
123 |
+
competition_type=COMP_INFO.competition_type,
|
124 |
)
|
125 |
if COMP_INFO.competition_type == "generic":
|
126 |
resp = sub.new_submission(token, submission_file, submission_comment)
|
competitions/compute_metrics.py
CHANGED
@@ -15,7 +15,7 @@ def compute_metrics(params):
|
|
15 |
|
16 |
solution_df = pd.read_csv(solution_file)
|
17 |
|
18 |
-
submission_filename = f"submissions/{params.
|
19 |
submission_file = hf_hub_download(
|
20 |
repo_id=params.competition_id,
|
21 |
filename=submission_filename,
|
@@ -47,7 +47,7 @@ def compute_metrics(params):
|
|
47 |
else:
|
48 |
_metric = getattr(metrics, params.metric)
|
49 |
target_cols = [col for col in solution_df.columns if col not in [params.submission_id_col, "split"]]
|
50 |
-
public_score = _metric(
|
51 |
private_score = _metric(private_solution_df[target_cols], private_submission_df[target_cols])
|
52 |
|
53 |
# scores can also be dictionaries for multiple metrics
|
|
|
15 |
|
16 |
solution_df = pd.read_csv(solution_file)
|
17 |
|
18 |
+
submission_filename = f"submissions/{params.team_id}-{params.submission_id}.csv"
|
19 |
submission_file = hf_hub_download(
|
20 |
repo_id=params.competition_id,
|
21 |
filename=submission_filename,
|
|
|
47 |
else:
|
48 |
_metric = getattr(metrics, params.metric)
|
49 |
target_cols = [col for col in solution_df.columns if col not in [params.submission_id_col, "split"]]
|
50 |
+
public_score = _metric(public_solution_df[target_cols], public_submission_df[target_cols])
|
51 |
private_score = _metric(private_solution_df[target_cols], private_submission_df[target_cols])
|
52 |
|
53 |
# scores can also be dictionaries for multiple metrics
|
competitions/evaluate.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
import argparse
|
2 |
import json
|
|
|
3 |
|
4 |
-
from huggingface_hub import snapshot_download
|
5 |
from loguru import logger
|
6 |
|
7 |
from competitions import utils
|
@@ -15,12 +16,32 @@ def parse_args():
|
|
15 |
return parser.parse_args()
|
16 |
|
17 |
|
|
|
|
|
|
|
|
|
|
|
18 |
def generate_submission_file(params):
|
|
|
19 |
logger.info("Downloading submission dataset")
|
20 |
-
snapshot_download(
|
21 |
-
repo_id=params.
|
22 |
local_dir=params.output_path,
|
23 |
token=params.token,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
repo_type="dataset",
|
25 |
)
|
26 |
|
@@ -35,9 +56,9 @@ def run(params):
|
|
35 |
if params.competition_type == "code":
|
36 |
generate_submission_file(params)
|
37 |
|
38 |
-
|
39 |
|
40 |
-
utils.update_submission_score(params, public_score, private_score)
|
41 |
utils.update_submission_status(params, "success")
|
42 |
utils.pause_space(params)
|
43 |
|
|
|
1 |
import argparse
|
2 |
import json
|
3 |
+
import subprocess
|
4 |
|
5 |
+
from huggingface_hub import HfApi, snapshot_download
|
6 |
from loguru import logger
|
7 |
|
8 |
from competitions import utils
|
|
|
16 |
return parser.parse_args()
|
17 |
|
18 |
|
19 |
+
def upload_submission_file(params, file_path):
|
20 |
+
logger.info("Uploading submission file")
|
21 |
+
pass
|
22 |
+
|
23 |
+
|
24 |
def generate_submission_file(params):
|
25 |
+
base_user = params.competition_id.split("/")[0]
|
26 |
logger.info("Downloading submission dataset")
|
27 |
+
submission_dir = snapshot_download(
|
28 |
+
repo_id=f"{base_user}/{params.submission_id}",
|
29 |
local_dir=params.output_path,
|
30 |
token=params.token,
|
31 |
+
repo_type="model",
|
32 |
+
)
|
33 |
+
# submission_dir has a script.py file
|
34 |
+
# start a subprocess to run the script.py
|
35 |
+
# the script.py will generate a submission.csv file in the submission_dir
|
36 |
+
# push the submission.csv file to the repo using upload_submission_file
|
37 |
+
logger.info("Generating submission file")
|
38 |
+
subprocess.run(["python", "script.py"], cwd=submission_dir)
|
39 |
+
|
40 |
+
api = HfApi(token=params.token)
|
41 |
+
api.upload_file(
|
42 |
+
path_or_fileobj=f"{submission_dir}/submission.csv",
|
43 |
+
path_in_repo=f"submissions/{params.team_id}-{params.submission_id}.csv",
|
44 |
+
repo_id=params.competition_id,
|
45 |
repo_type="dataset",
|
46 |
)
|
47 |
|
|
|
56 |
if params.competition_type == "code":
|
57 |
generate_submission_file(params)
|
58 |
|
59 |
+
evaluation = compute_metrics(params)
|
60 |
|
61 |
+
utils.update_submission_score(params, evaluation["public_score"], evaluation["private_score"])
|
62 |
utils.update_submission_status(params, "success")
|
63 |
utils.pause_space(params)
|
64 |
|
competitions/info.py
CHANGED
@@ -84,10 +84,6 @@ class CompetitionInfo:
|
|
84 |
def competition_description(self):
|
85 |
return self.competition_desc
|
86 |
|
87 |
-
@property
|
88 |
-
def competition_name(self):
|
89 |
-
return self.config["COMPETITION_NAME"]
|
90 |
-
|
91 |
@property
|
92 |
def submission_columns(self):
|
93 |
return self.config["SUBMISSION_COLUMNS"].split(",")
|
|
|
84 |
def competition_description(self):
|
85 |
return self.competition_desc
|
86 |
|
|
|
|
|
|
|
|
|
87 |
@property
|
88 |
def submission_columns(self):
|
89 |
return self.config["SUBMISSION_COLUMNS"].split(",")
|
competitions/leaderboard.py
CHANGED
@@ -25,13 +25,13 @@ class Leaderboard:
|
|
25 |
def _refresh_columns(self):
|
26 |
self.private_columns = [
|
27 |
"rank",
|
28 |
-
"
|
29 |
"private_score",
|
30 |
"submission_datetime",
|
31 |
]
|
32 |
self.public_columns = [
|
33 |
"rank",
|
34 |
-
"
|
35 |
"public_score",
|
36 |
"submission_datetime",
|
37 |
]
|
@@ -49,14 +49,16 @@ class Leaderboard:
|
|
49 |
start_time = time.time()
|
50 |
submissions = []
|
51 |
for submission in glob.glob(os.path.join(submissions_folder, "submission_info", "*.json")):
|
52 |
-
with open(submission, "r") as f:
|
53 |
submission_info = json.load(f)
|
54 |
# only select submissions that are done
|
55 |
-
submission_info["submissions"] = [
|
|
|
|
|
56 |
submission_info["submissions"] = [
|
57 |
sub
|
58 |
for sub in submission_info["submissions"]
|
59 |
-
if datetime.strptime(sub["
|
60 |
]
|
61 |
if len(submission_info["submissions"]) == 0:
|
62 |
continue
|
@@ -83,15 +85,13 @@ class Leaderboard:
|
|
83 |
submission_info["submissions"] = submission_info["submissions"][0]
|
84 |
temp_info = {
|
85 |
"id": submission_info["id"],
|
86 |
-
"name": submission_info["name"],
|
87 |
"submission_id": submission_info["submissions"]["submission_id"],
|
88 |
"submission_comment": submission_info["submissions"]["submission_comment"],
|
89 |
"status": submission_info["submissions"]["status"],
|
90 |
"selected": submission_info["submissions"]["selected"],
|
91 |
"public_score": submission_info["submissions"]["public_score"],
|
92 |
# "private_score": submission_info["submissions"]["private_score"],
|
93 |
-
"
|
94 |
-
"submission_time": submission_info["submissions"]["time"],
|
95 |
}
|
96 |
for score in other_scores:
|
97 |
temp_info[score] = submission_info["submissions"][score]
|
@@ -112,10 +112,10 @@ class Leaderboard:
|
|
112 |
start_time = time.time()
|
113 |
submissions = []
|
114 |
for submission in glob.glob(os.path.join(submissions_folder, "submission_info", "*.json")):
|
115 |
-
with open(submission, "r") as f:
|
116 |
submission_info = json.load(f)
|
117 |
submission_info["submissions"] = [
|
118 |
-
sub for sub in submission_info["submissions"] if sub["status"] == "
|
119 |
]
|
120 |
if len(submission_info["submissions"]) == 0:
|
121 |
continue
|
@@ -146,8 +146,7 @@ class Leaderboard:
|
|
146 |
if selected_submissions == 0:
|
147 |
# select submissions with best public score
|
148 |
submission_info["submissions"].sort(
|
149 |
-
key=lambda x: x["public_score"],
|
150 |
-
reverse=True if self.eval_higher_is_better else False,
|
151 |
)
|
152 |
# select only the best submission
|
153 |
submission_info["submissions"] = submission_info["submissions"][0]
|
@@ -156,8 +155,7 @@ class Leaderboard:
|
|
156 |
submission_info["submissions"] = [sub for sub in submission_info["submissions"] if sub["selected"]]
|
157 |
# sort by private score
|
158 |
submission_info["submissions"].sort(
|
159 |
-
key=lambda x: x["private_score"],
|
160 |
-
reverse=True if self.eval_higher_is_better else False,
|
161 |
)
|
162 |
# select only the best submission
|
163 |
submission_info["submissions"] = submission_info["submissions"][0]
|
@@ -167,28 +165,24 @@ class Leaderboard:
|
|
167 |
sub for sub in submission_info["submissions"] if not sub["selected"]
|
168 |
]
|
169 |
temp_best_public_submissions.sort(
|
170 |
-
key=lambda x: x["public_score"],
|
171 |
-
reverse=True if self.eval_higher_is_better else False,
|
172 |
)
|
173 |
missing_candidates = self.max_selected_submissions - len(temp_selected_submissions)
|
174 |
temp_best_public_submissions = temp_best_public_submissions[:missing_candidates]
|
175 |
submission_info["submissions"] = temp_selected_submissions + temp_best_public_submissions
|
176 |
submission_info["submissions"].sort(
|
177 |
-
key=lambda x: x["private_score"],
|
178 |
-
reverse=True if self.eval_higher_is_better else False,
|
179 |
)
|
180 |
submission_info["submissions"] = submission_info["submissions"][0]
|
181 |
|
182 |
temp_info = {
|
183 |
"id": submission_info["id"],
|
184 |
-
"name": submission_info["name"],
|
185 |
"submission_id": submission_info["submissions"]["submission_id"],
|
186 |
"submission_comment": submission_info["submissions"]["submission_comment"],
|
187 |
"status": submission_info["submissions"]["status"],
|
188 |
"selected": submission_info["submissions"]["selected"],
|
189 |
"private_score": submission_info["submissions"]["private_score"],
|
190 |
-
"
|
191 |
-
"submission_time": submission_info["submissions"]["time"],
|
192 |
}
|
193 |
for score in other_scores:
|
194 |
temp_info[score] = submission_info["submissions"][score]
|
@@ -206,10 +200,10 @@ class Leaderboard:
|
|
206 |
return pd.DataFrame()
|
207 |
|
208 |
df = pd.DataFrame(submissions)
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
# only keep submissions before the end date
|
214 |
df = df[df["submission_datetime"] < self.end_date].reset_index(drop=True)
|
215 |
|
@@ -254,7 +248,6 @@ class Leaderboard:
|
|
254 |
columns = self.public_columns if not private else self.private_columns
|
255 |
logger.info(columns)
|
256 |
# remove duplicate columns
|
257 |
-
# ['rank', 'name', 'public_score', 'submission_datetime', 'public_score_track1', 'public_score_track1', 'public_score_track1', 'public_score_track1']
|
258 |
columns = list(dict.fromkeys(columns))
|
259 |
|
260 |
# send submission_datetime to the end
|
|
|
25 |
def _refresh_columns(self):
|
26 |
self.private_columns = [
|
27 |
"rank",
|
28 |
+
"id",
|
29 |
"private_score",
|
30 |
"submission_datetime",
|
31 |
]
|
32 |
self.public_columns = [
|
33 |
"rank",
|
34 |
+
"id",
|
35 |
"public_score",
|
36 |
"submission_datetime",
|
37 |
]
|
|
|
49 |
start_time = time.time()
|
50 |
submissions = []
|
51 |
for submission in glob.glob(os.path.join(submissions_folder, "submission_info", "*.json")):
|
52 |
+
with open(submission, "r", encoding="utf-8") as f:
|
53 |
submission_info = json.load(f)
|
54 |
# only select submissions that are done
|
55 |
+
submission_info["submissions"] = [
|
56 |
+
sub for sub in submission_info["submissions"] if sub["status"] == "success"
|
57 |
+
]
|
58 |
submission_info["submissions"] = [
|
59 |
sub
|
60 |
for sub in submission_info["submissions"]
|
61 |
+
if datetime.strptime(sub["datetime"], "%Y-%m-%d %H:%M:%S") < self.end_date
|
62 |
]
|
63 |
if len(submission_info["submissions"]) == 0:
|
64 |
continue
|
|
|
85 |
submission_info["submissions"] = submission_info["submissions"][0]
|
86 |
temp_info = {
|
87 |
"id": submission_info["id"],
|
|
|
88 |
"submission_id": submission_info["submissions"]["submission_id"],
|
89 |
"submission_comment": submission_info["submissions"]["submission_comment"],
|
90 |
"status": submission_info["submissions"]["status"],
|
91 |
"selected": submission_info["submissions"]["selected"],
|
92 |
"public_score": submission_info["submissions"]["public_score"],
|
93 |
# "private_score": submission_info["submissions"]["private_score"],
|
94 |
+
"submission_datetime": submission_info["submissions"]["datetime"],
|
|
|
95 |
}
|
96 |
for score in other_scores:
|
97 |
temp_info[score] = submission_info["submissions"][score]
|
|
|
112 |
start_time = time.time()
|
113 |
submissions = []
|
114 |
for submission in glob.glob(os.path.join(submissions_folder, "submission_info", "*.json")):
|
115 |
+
with open(submission, "r", encoding="utf-8") as f:
|
116 |
submission_info = json.load(f)
|
117 |
submission_info["submissions"] = [
|
118 |
+
sub for sub in submission_info["submissions"] if sub["status"] == "success"
|
119 |
]
|
120 |
if len(submission_info["submissions"]) == 0:
|
121 |
continue
|
|
|
146 |
if selected_submissions == 0:
|
147 |
# select submissions with best public score
|
148 |
submission_info["submissions"].sort(
|
149 |
+
key=lambda x: x["public_score"], reverse=self.eval_higher_is_better
|
|
|
150 |
)
|
151 |
# select only the best submission
|
152 |
submission_info["submissions"] = submission_info["submissions"][0]
|
|
|
155 |
submission_info["submissions"] = [sub for sub in submission_info["submissions"] if sub["selected"]]
|
156 |
# sort by private score
|
157 |
submission_info["submissions"].sort(
|
158 |
+
key=lambda x: x["private_score"], reverse=self.eval_higher_is_better
|
|
|
159 |
)
|
160 |
# select only the best submission
|
161 |
submission_info["submissions"] = submission_info["submissions"][0]
|
|
|
165 |
sub for sub in submission_info["submissions"] if not sub["selected"]
|
166 |
]
|
167 |
temp_best_public_submissions.sort(
|
168 |
+
key=lambda x: x["public_score"], reverse=self.eval_higher_is_better
|
|
|
169 |
)
|
170 |
missing_candidates = self.max_selected_submissions - len(temp_selected_submissions)
|
171 |
temp_best_public_submissions = temp_best_public_submissions[:missing_candidates]
|
172 |
submission_info["submissions"] = temp_selected_submissions + temp_best_public_submissions
|
173 |
submission_info["submissions"].sort(
|
174 |
+
key=lambda x: x["private_score"], reverse=self.eval_higher_is_better
|
|
|
175 |
)
|
176 |
submission_info["submissions"] = submission_info["submissions"][0]
|
177 |
|
178 |
temp_info = {
|
179 |
"id": submission_info["id"],
|
|
|
180 |
"submission_id": submission_info["submissions"]["submission_id"],
|
181 |
"submission_comment": submission_info["submissions"]["submission_comment"],
|
182 |
"status": submission_info["submissions"]["status"],
|
183 |
"selected": submission_info["submissions"]["selected"],
|
184 |
"private_score": submission_info["submissions"]["private_score"],
|
185 |
+
"submission_datetime": submission_info["submissions"]["datetime"],
|
|
|
186 |
}
|
187 |
for score in other_scores:
|
188 |
temp_info[score] = submission_info["submissions"][score]
|
|
|
200 |
return pd.DataFrame()
|
201 |
|
202 |
df = pd.DataFrame(submissions)
|
203 |
+
|
204 |
+
# convert submission datetime to pandas datetime
|
205 |
+
df["submission_datetime"] = pd.to_datetime(df["submission_datetime"], format="%Y-%m-%d %H:%M:%S")
|
206 |
+
|
207 |
# only keep submissions before the end date
|
208 |
df = df[df["submission_datetime"] < self.end_date].reset_index(drop=True)
|
209 |
|
|
|
248 |
columns = self.public_columns if not private else self.private_columns
|
249 |
logger.info(columns)
|
250 |
# remove duplicate columns
|
|
|
251 |
columns = list(dict.fromkeys(columns))
|
252 |
|
253 |
# send submission_datetime to the end
|
competitions/params.py
CHANGED
@@ -9,12 +9,13 @@ class EvalParams(BaseModel):
|
|
9 |
competition_type: str
|
10 |
metric: str
|
11 |
token: str
|
12 |
-
|
13 |
submission_id: str
|
14 |
submission_id_col: str
|
15 |
submission_cols: List[str]
|
16 |
submission_rows: int
|
17 |
output_path: str
|
|
|
18 |
|
19 |
class Config:
|
20 |
protected_namespaces = ()
|
|
|
9 |
competition_type: str
|
10 |
metric: str
|
11 |
token: str
|
12 |
+
team_id: str
|
13 |
submission_id: str
|
14 |
submission_id_col: str
|
15 |
submission_cols: List[str]
|
16 |
submission_rows: int
|
17 |
output_path: str
|
18 |
+
submission_repo: str
|
19 |
|
20 |
class Config:
|
21 |
protected_namespaces = ()
|
competitions/runner.py
CHANGED
@@ -1,17 +1,30 @@
|
|
1 |
import glob
|
|
|
2 |
import json
|
3 |
import os
|
|
|
|
|
4 |
import time
|
5 |
from dataclasses import dataclass
|
6 |
|
7 |
import pandas as pd
|
8 |
-
from huggingface_hub import snapshot_download
|
9 |
from loguru import logger
|
10 |
|
11 |
from competitions.info import CompetitionInfo
|
12 |
from competitions.utils import run_evaluation
|
13 |
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
@dataclass
|
16 |
class JobRunner:
|
17 |
competition_info: CompetitionInfo
|
@@ -27,56 +40,121 @@ class JobRunner:
|
|
27 |
self.submission_rows = self.competition_info.submission_rows
|
28 |
|
29 |
def get_pending_subs(self):
|
30 |
-
|
31 |
repo_id=self.competition_id,
|
32 |
allow_patterns="submission_info/*.json",
|
33 |
token=self.token,
|
34 |
repo_type="dataset",
|
35 |
)
|
36 |
-
|
37 |
pending_submissions = []
|
38 |
-
for _json in
|
39 |
_json = json.load(open(_json, "r", encoding="utf-8"))
|
40 |
-
|
41 |
for sub in _json["submissions"]:
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
if len(pending_submissions) == 0:
|
52 |
logger.info("No pending submissions.")
|
53 |
return None
|
54 |
logger.info(f"Found {len(pending_submissions)} pending submissions.")
|
55 |
pending_submissions = pd.DataFrame(pending_submissions)
|
56 |
-
pending_submissions =
|
|
|
57 |
pending_submissions = pending_submissions.reset_index(drop=True)
|
58 |
return pending_submissions
|
59 |
|
60 |
def run_local(self, pending_submissions):
|
61 |
for _, row in pending_submissions.iterrows():
|
62 |
-
|
63 |
submission_id = row["submission_id"]
|
64 |
eval_params = {
|
65 |
"competition_id": self.competition_id,
|
66 |
"competition_type": self.competition_type,
|
67 |
"metric": self.metric,
|
68 |
"token": self.token,
|
69 |
-
"
|
70 |
"submission_id": submission_id,
|
71 |
"submission_id_col": self.submission_id_col,
|
72 |
"submission_cols": self.submission_cols,
|
73 |
"submission_rows": self.submission_rows,
|
74 |
"output_path": self.output_path,
|
|
|
75 |
}
|
76 |
eval_params = json.dumps(eval_params)
|
77 |
eval_pid = run_evaluation(eval_params, local=True, wait=True)
|
78 |
logger.info(f"New evaluation process started with pid {eval_pid}.")
|
79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
def run(self):
|
81 |
while True:
|
82 |
pending_submissions = self.get_pending_subs()
|
@@ -85,4 +163,10 @@ class JobRunner:
|
|
85 |
continue
|
86 |
if self.competition_type == "generic":
|
87 |
self.run_local(pending_submissions)
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
time.sleep(5)
|
|
|
1 |
import glob
|
2 |
+
import io
|
3 |
import json
|
4 |
import os
|
5 |
+
import random
|
6 |
+
import string
|
7 |
import time
|
8 |
from dataclasses import dataclass
|
9 |
|
10 |
import pandas as pd
|
11 |
+
from huggingface_hub import HfApi, snapshot_download
|
12 |
from loguru import logger
|
13 |
|
14 |
from competitions.info import CompetitionInfo
|
15 |
from competitions.utils import run_evaluation
|
16 |
|
17 |
|
18 |
+
_DOCKERFILE = """
|
19 |
+
FROM huggingface/competitions:latest
|
20 |
+
|
21 |
+
CMD uvicorn competitions.app:app --port 7860 --host 0.0.0.0
|
22 |
+
"""
|
23 |
+
|
24 |
+
# format _DOCKERFILE
|
25 |
+
_DOCKERFILE = _DOCKERFILE.replace("\n", " ").replace(" ", "\n").strip()
|
26 |
+
|
27 |
+
|
28 |
@dataclass
|
29 |
class JobRunner:
|
30 |
competition_info: CompetitionInfo
|
|
|
40 |
self.submission_rows = self.competition_info.submission_rows
|
41 |
|
42 |
def get_pending_subs(self):
|
43 |
+
submission_jsons = snapshot_download(
|
44 |
repo_id=self.competition_id,
|
45 |
allow_patterns="submission_info/*.json",
|
46 |
token=self.token,
|
47 |
repo_type="dataset",
|
48 |
)
|
49 |
+
submission_jsons = glob.glob(os.path.join(submission_jsons, "submission_info/*.json"))
|
50 |
pending_submissions = []
|
51 |
+
for _json in submission_jsons:
|
52 |
_json = json.load(open(_json, "r", encoding="utf-8"))
|
53 |
+
team_id = _json["id"]
|
54 |
for sub in _json["submissions"]:
|
55 |
+
if sub["status"] == "pending":
|
56 |
+
pending_submissions.append(
|
57 |
+
{
|
58 |
+
"team_id": team_id,
|
59 |
+
"submission_id": sub["submission_id"],
|
60 |
+
"datetime": sub["datetime"],
|
61 |
+
"submission_repo": sub["submission_repo"],
|
62 |
+
}
|
63 |
+
)
|
64 |
if len(pending_submissions) == 0:
|
65 |
logger.info("No pending submissions.")
|
66 |
return None
|
67 |
logger.info(f"Found {len(pending_submissions)} pending submissions.")
|
68 |
pending_submissions = pd.DataFrame(pending_submissions)
|
69 |
+
pending_submissions["datetime"] = pd.to_datetime(pending_submissions["datetime"])
|
70 |
+
pending_submissions = pending_submissions.sort_values("datetime")
|
71 |
pending_submissions = pending_submissions.reset_index(drop=True)
|
72 |
return pending_submissions
|
73 |
|
74 |
def run_local(self, pending_submissions):
|
75 |
for _, row in pending_submissions.iterrows():
|
76 |
+
team_id = row["team_id"]
|
77 |
submission_id = row["submission_id"]
|
78 |
eval_params = {
|
79 |
"competition_id": self.competition_id,
|
80 |
"competition_type": self.competition_type,
|
81 |
"metric": self.metric,
|
82 |
"token": self.token,
|
83 |
+
"team_id": team_id,
|
84 |
"submission_id": submission_id,
|
85 |
"submission_id_col": self.submission_id_col,
|
86 |
"submission_cols": self.submission_cols,
|
87 |
"submission_rows": self.submission_rows,
|
88 |
"output_path": self.output_path,
|
89 |
+
"submission_repo": row["submission_repo"],
|
90 |
}
|
91 |
eval_params = json.dumps(eval_params)
|
92 |
eval_pid = run_evaluation(eval_params, local=True, wait=True)
|
93 |
logger.info(f"New evaluation process started with pid {eval_pid}.")
|
94 |
|
95 |
+
def _create_readme(self, project_name):
|
96 |
+
_readme = "---\n"
|
97 |
+
_readme += f"title: {project_name}\n"
|
98 |
+
_readme += "emoji: 🚀\n"
|
99 |
+
_readme += "colorFrom: green\n"
|
100 |
+
_readme += "colorTo: indigo\n"
|
101 |
+
_readme += "sdk: docker\n"
|
102 |
+
_readme += "pinned: false\n"
|
103 |
+
_readme += "duplicated_from: autotrain-projects/autotrain-advanced\n"
|
104 |
+
_readme += "---\n"
|
105 |
+
_readme = io.BytesIO(_readme.encode())
|
106 |
+
return _readme
|
107 |
+
|
108 |
+
def create_space(self, team_id, submission_id, submission_repo):
|
109 |
+
project_name = "".join(
|
110 |
+
random.choices(
|
111 |
+
string.ascii_lowercase + string.digits,
|
112 |
+
k=10,
|
113 |
+
)
|
114 |
+
)
|
115 |
+
api = HfApi(token=self.token)
|
116 |
+
username = self.competition_id.split("/")[0]
|
117 |
+
repo_id = f"{username}/competitions-{project_name}"
|
118 |
+
api.create_repo(
|
119 |
+
repo_id=repo_id,
|
120 |
+
repo_type="space",
|
121 |
+
space_sdk="docker",
|
122 |
+
space_hardware="cpu-basic",
|
123 |
+
private=True,
|
124 |
+
)
|
125 |
+
params = {
|
126 |
+
"competition_id": self.competition_id,
|
127 |
+
"competition_type": self.competition_type,
|
128 |
+
"metric": self.metric,
|
129 |
+
"token": self.token,
|
130 |
+
"team_id": team_id,
|
131 |
+
"submission_id": submission_id,
|
132 |
+
"submission_id_col": self.submission_id_col,
|
133 |
+
"submission_cols": self.submission_cols,
|
134 |
+
"submission_rows": self.submission_rows,
|
135 |
+
"output_path": self.output_path,
|
136 |
+
"submission_repo": submission_repo,
|
137 |
+
}
|
138 |
+
|
139 |
+
api.add_space_secret(repo_id=repo_id, key="PARAMS", value=json.dumps(params))
|
140 |
+
|
141 |
+
readme = self._create_readme(project_name)
|
142 |
+
api.upload_file(
|
143 |
+
path_or_fileobj=readme,
|
144 |
+
path_in_repo="README.md",
|
145 |
+
repo_id=repo_id,
|
146 |
+
repo_type="space",
|
147 |
+
)
|
148 |
+
|
149 |
+
_dockerfile = io.BytesIO(_DOCKERFILE.encode())
|
150 |
+
api.upload_file(
|
151 |
+
path_or_fileobj=_dockerfile,
|
152 |
+
path_in_repo="Dockerfile",
|
153 |
+
repo_id=repo_id,
|
154 |
+
repo_type="space",
|
155 |
+
)
|
156 |
+
return repo_id
|
157 |
+
|
158 |
def run(self):
|
159 |
while True:
|
160 |
pending_submissions = self.get_pending_subs()
|
|
|
163 |
continue
|
164 |
if self.competition_type == "generic":
|
165 |
self.run_local(pending_submissions)
|
166 |
+
elif self.competition_type == "code":
|
167 |
+
for _, row in pending_submissions.iterrows():
|
168 |
+
team_id = row["team_id"]
|
169 |
+
submission_id = row["submission_id"]
|
170 |
+
submission_repo = row["submission_repo"]
|
171 |
+
self.create_space(team_id, submission_id, submission_repo)
|
172 |
time.sleep(5)
|
competitions/submissions.py
CHANGED
@@ -5,7 +5,7 @@ from dataclasses import dataclass
|
|
5 |
from datetime import datetime
|
6 |
|
7 |
import pandas as pd
|
8 |
-
from huggingface_hub import HfApi, hf_hub_download
|
9 |
from huggingface_hub.utils._errors import EntryNotFoundError
|
10 |
from loguru import logger
|
11 |
|
@@ -16,6 +16,7 @@ from .utils import user_authentication
|
|
16 |
@dataclass
|
17 |
class Submissions:
|
18 |
competition_id: str
|
|
|
19 |
submission_limit: str
|
20 |
end_date: datetime
|
21 |
token: str
|
@@ -42,38 +43,35 @@ class Submissions:
|
|
42 |
def _verify_submission(self, bytes_data):
|
43 |
return True
|
44 |
|
45 |
-
def
|
46 |
api = HfApi(token=self.token)
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
user_submission_info_json_bytes = user_submission_info_json.encode("utf-8")
|
54 |
-
user_submission_info_json_buffer = io.BytesIO(user_submission_info_json_bytes)
|
55 |
|
56 |
api.upload_file(
|
57 |
-
path_or_fileobj=
|
58 |
-
path_in_repo=f"submission_info/{
|
59 |
repo_id=self.competition_id,
|
60 |
repo_type="dataset",
|
61 |
)
|
62 |
|
63 |
-
def
|
64 |
-
user_id = user_info["id"]
|
65 |
try:
|
66 |
-
|
67 |
repo_id=self.competition_id,
|
68 |
-
filename=f"submission_info/{
|
69 |
token=self.token,
|
70 |
repo_type="dataset",
|
71 |
)
|
72 |
except EntryNotFoundError:
|
73 |
-
self.
|
74 |
-
|
75 |
repo_id=self.competition_id,
|
76 |
-
filename=f"submission_info/{
|
77 |
token=self.token,
|
78 |
repo_type="dataset",
|
79 |
)
|
@@ -81,36 +79,37 @@ class Submissions:
|
|
81 |
logger.error(e)
|
82 |
raise Exception("Hugging Face Hub is unreachable, please try again later.")
|
83 |
|
84 |
-
with open(
|
85 |
-
|
86 |
|
87 |
todays_date = datetime.now().strftime("%Y-%m-%d")
|
88 |
-
if len(
|
89 |
-
|
90 |
|
91 |
# count the number of times user has submitted today
|
92 |
todays_submissions = 0
|
93 |
-
for sub in
|
94 |
-
|
|
|
|
|
95 |
todays_submissions += 1
|
96 |
if todays_submissions >= self.submission_limit:
|
97 |
return False
|
98 |
return True
|
99 |
|
100 |
-
def _submissions_today(self,
|
101 |
-
user_id = user_info["id"]
|
102 |
try:
|
103 |
-
|
104 |
repo_id=self.competition_id,
|
105 |
-
filename=f"submission_info/{
|
106 |
token=self.token,
|
107 |
repo_type="dataset",
|
108 |
)
|
109 |
except EntryNotFoundError:
|
110 |
-
self.
|
111 |
-
|
112 |
repo_id=self.competition_id,
|
113 |
-
filename=f"submission_info/{
|
114 |
token=self.token,
|
115 |
repo_type="dataset",
|
116 |
)
|
@@ -118,39 +117,43 @@ class Submissions:
|
|
118 |
logger.error(e)
|
119 |
raise Exception("Hugging Face Hub is unreachable, please try again later.")
|
120 |
|
121 |
-
with open(
|
122 |
-
|
123 |
|
124 |
todays_date = datetime.now().strftime("%Y-%m-%d")
|
125 |
-
if len(
|
126 |
-
|
127 |
|
128 |
# count the number of times user has submitted today
|
129 |
todays_submissions = 0
|
130 |
-
for sub in
|
131 |
-
|
|
|
|
|
132 |
todays_submissions += 1
|
133 |
return todays_submissions
|
134 |
|
135 |
-
def _increment_submissions(self, user_id, submission_id, submission_comment):
|
136 |
-
|
|
|
|
|
137 |
repo_id=self.competition_id,
|
138 |
-
filename=f"submission_info/{
|
139 |
token=self.token,
|
140 |
repo_type="dataset",
|
141 |
)
|
142 |
-
with open(
|
143 |
-
|
144 |
-
|
145 |
-
current_time = datetime.now().strftime("%H:%M:%S")
|
146 |
|
147 |
# here goes all the default stuff for submission
|
148 |
-
|
149 |
{
|
150 |
-
"
|
151 |
-
"time": current_time,
|
152 |
"submission_id": submission_id,
|
153 |
"submission_comment": submission_comment,
|
|
|
|
|
154 |
"status": "pending",
|
155 |
"selected": False,
|
156 |
"public_score": -1,
|
@@ -159,33 +162,35 @@ class Submissions:
|
|
159 |
)
|
160 |
# count the number of times user has submitted today
|
161 |
todays_submissions = 0
|
162 |
-
|
163 |
-
|
|
|
|
|
|
|
164 |
todays_submissions += 1
|
165 |
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
user_submission_info_json_buffer = io.BytesIO(user_submission_info_json_bytes)
|
170 |
api = HfApi(token=self.token)
|
171 |
api.upload_file(
|
172 |
-
path_or_fileobj=
|
173 |
-
path_in_repo=f"submission_info/{
|
174 |
repo_id=self.competition_id,
|
175 |
repo_type="dataset",
|
176 |
)
|
177 |
return todays_submissions
|
178 |
|
179 |
-
def
|
180 |
-
|
181 |
repo_id=self.competition_id,
|
182 |
-
filename=f"submission_info/{
|
183 |
token=self.token,
|
184 |
repo_type="dataset",
|
185 |
)
|
186 |
-
with open(
|
187 |
-
|
188 |
-
return
|
189 |
|
190 |
def update_selected_submissions(self, user_token, selected_submission_ids):
|
191 |
current_datetime = datetime.now()
|
@@ -194,44 +199,44 @@ class Submissions:
|
|
194 |
|
195 |
user_info = self._get_user_info(user_token)
|
196 |
user_id = user_info["id"]
|
|
|
197 |
|
198 |
-
|
199 |
repo_id=self.competition_id,
|
200 |
-
filename=f"submission_info/{
|
201 |
token=self.token,
|
202 |
repo_type="dataset",
|
203 |
)
|
204 |
-
with open(
|
205 |
-
|
206 |
|
207 |
-
for sub in
|
208 |
if sub["submission_id"] in selected_submission_ids:
|
209 |
sub["selected"] = True
|
210 |
else:
|
211 |
sub["selected"] = False
|
212 |
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
user_submission_info_json_buffer = io.BytesIO(user_submission_info_json_bytes)
|
217 |
api = HfApi(token=self.token)
|
218 |
api.upload_file(
|
219 |
-
path_or_fileobj=
|
220 |
-
path_in_repo=f"submission_info/{
|
221 |
repo_id=self.competition_id,
|
222 |
repo_type="dataset",
|
223 |
)
|
224 |
|
225 |
-
def
|
226 |
-
# get user submissions
|
227 |
user_id = user_info["id"]
|
|
|
228 |
try:
|
229 |
-
|
230 |
except EntryNotFoundError:
|
231 |
logger.warning("No submissions found for user")
|
232 |
return pd.DataFrame(), pd.DataFrame()
|
233 |
|
234 |
-
submissions_df = pd.DataFrame(
|
235 |
|
236 |
if not private:
|
237 |
submissions_df = submissions_df.drop(columns=["private_score"])
|
@@ -314,47 +319,95 @@ class Submissions:
|
|
314 |
private = False
|
315 |
if current_date_time >= self.end_date:
|
316 |
private = True
|
317 |
-
success_subs, failed_subs = self.
|
318 |
return success_subs, failed_subs
|
319 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
320 |
def new_submission(self, user_token, uploaded_file, submission_comment):
|
321 |
# verify token
|
322 |
user_info = self._get_user_info(user_token)
|
|
|
|
|
|
|
323 |
|
324 |
-
# check if
|
325 |
-
if self.
|
326 |
raise SubmissionLimitError("Submission limit reached")
|
327 |
|
328 |
-
|
329 |
-
|
|
|
|
|
|
|
330 |
|
331 |
-
# verify file is valid
|
332 |
-
if not self._verify_submission(bytes_data):
|
333 |
-
raise SubmissionError("Invalid submission file")
|
334 |
-
else:
|
335 |
-
user_id = user_info["id"]
|
336 |
-
submission_id = str(uuid.uuid4())
|
337 |
file_extension = uploaded_file.filename.split(".")[-1]
|
338 |
# upload file to hf hub
|
339 |
api = HfApi(token=self.token)
|
340 |
api.upload_file(
|
341 |
path_or_fileobj=bytes_data,
|
342 |
-
path_in_repo=f"submissions/{
|
343 |
repo_id=self.competition_id,
|
344 |
repo_type="dataset",
|
345 |
)
|
346 |
-
# update submission limit
|
347 |
submissions_made = self._increment_submissions(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
348 |
user_id=user_id,
|
349 |
submission_id=submission_id,
|
350 |
-
submission_comment=
|
|
|
351 |
)
|
352 |
-
# TODO: schedule submission for evaluation
|
353 |
-
# self._create_autotrain_project(
|
354 |
-
# submission_id=f"{submission_id}",
|
355 |
-
# competition_id=f"{self.competition_id}",
|
356 |
-
# user_id=user_id,
|
357 |
-
# competition_type="generic",
|
358 |
-
# )
|
359 |
remaining_submissions = self.submission_limit - submissions_made
|
360 |
return remaining_submissions
|
|
|
5 |
from datetime import datetime
|
6 |
|
7 |
import pandas as pd
|
8 |
+
from huggingface_hub import HfApi, hf_hub_download, snapshot_download
|
9 |
from huggingface_hub.utils._errors import EntryNotFoundError
|
10 |
from loguru import logger
|
11 |
|
|
|
16 |
@dataclass
|
17 |
class Submissions:
|
18 |
competition_id: str
|
19 |
+
competition_type: str
|
20 |
submission_limit: str
|
21 |
end_date: datetime
|
22 |
token: str
|
|
|
43 |
def _verify_submission(self, bytes_data):
|
44 |
return True
|
45 |
|
46 |
+
def _add_new_team(self, team_id):
|
47 |
api = HfApi(token=self.token)
|
48 |
+
team_submission_info = {}
|
49 |
+
team_submission_info["id"] = team_id
|
50 |
+
team_submission_info["submissions"] = []
|
51 |
+
team_submission_info_json = json.dumps(team_submission_info, indent=4)
|
52 |
+
team_submission_info_json_bytes = team_submission_info_json.encode("utf-8")
|
53 |
+
team_submission_info_json_buffer = io.BytesIO(team_submission_info_json_bytes)
|
|
|
|
|
54 |
|
55 |
api.upload_file(
|
56 |
+
path_or_fileobj=team_submission_info_json_buffer,
|
57 |
+
path_in_repo=f"submission_info/{team_id}.json",
|
58 |
repo_id=self.competition_id,
|
59 |
repo_type="dataset",
|
60 |
)
|
61 |
|
62 |
+
def _check_team_submission_limit(self, team_id):
|
|
|
63 |
try:
|
64 |
+
team_fname = hf_hub_download(
|
65 |
repo_id=self.competition_id,
|
66 |
+
filename=f"submission_info/{team_id}.json",
|
67 |
token=self.token,
|
68 |
repo_type="dataset",
|
69 |
)
|
70 |
except EntryNotFoundError:
|
71 |
+
self._add_new_team(team_id)
|
72 |
+
team_fname = hf_hub_download(
|
73 |
repo_id=self.competition_id,
|
74 |
+
filename=f"submission_info/{team_id}.json",
|
75 |
token=self.token,
|
76 |
repo_type="dataset",
|
77 |
)
|
|
|
79 |
logger.error(e)
|
80 |
raise Exception("Hugging Face Hub is unreachable, please try again later.")
|
81 |
|
82 |
+
with open(team_fname, "r", encoding="utf-8") as f:
|
83 |
+
team_submission_info = json.load(f)
|
84 |
|
85 |
todays_date = datetime.now().strftime("%Y-%m-%d")
|
86 |
+
if len(team_submission_info["submissions"]) == 0:
|
87 |
+
team_submission_info["submissions"] = []
|
88 |
|
89 |
# count the number of times user has submitted today
|
90 |
todays_submissions = 0
|
91 |
+
for sub in team_submission_info["submissions"]:
|
92 |
+
submission_datetime = sub["datetime"]
|
93 |
+
submission_date = submission_datetime.split(" ")[0]
|
94 |
+
if submission_date == todays_date:
|
95 |
todays_submissions += 1
|
96 |
if todays_submissions >= self.submission_limit:
|
97 |
return False
|
98 |
return True
|
99 |
|
100 |
+
def _submissions_today(self, team_id):
|
|
|
101 |
try:
|
102 |
+
team_fname = hf_hub_download(
|
103 |
repo_id=self.competition_id,
|
104 |
+
filename=f"submission_info/{team_id}.json",
|
105 |
token=self.token,
|
106 |
repo_type="dataset",
|
107 |
)
|
108 |
except EntryNotFoundError:
|
109 |
+
self._add_new_team(team_id)
|
110 |
+
team_fname = hf_hub_download(
|
111 |
repo_id=self.competition_id,
|
112 |
+
filename=f"submission_info/{team_id}.json",
|
113 |
token=self.token,
|
114 |
repo_type="dataset",
|
115 |
)
|
|
|
117 |
logger.error(e)
|
118 |
raise Exception("Hugging Face Hub is unreachable, please try again later.")
|
119 |
|
120 |
+
with open(team_fname, "r", encoding="utf-8") as f:
|
121 |
+
team_submission_info = json.load(f)
|
122 |
|
123 |
todays_date = datetime.now().strftime("%Y-%m-%d")
|
124 |
+
if len(team_submission_info["submissions"]) == 0:
|
125 |
+
team_submission_info["submissions"] = []
|
126 |
|
127 |
# count the number of times user has submitted today
|
128 |
todays_submissions = 0
|
129 |
+
for sub in team_submission_info["submissions"]:
|
130 |
+
submission_datetime = sub["datetime"]
|
131 |
+
submission_date = submission_datetime.split(" ")[0]
|
132 |
+
if submission_date == todays_date:
|
133 |
todays_submissions += 1
|
134 |
return todays_submissions
|
135 |
|
136 |
+
def _increment_submissions(self, team_id, user_id, submission_id, submission_comment, submission_repo=None):
|
137 |
+
if submission_repo is None:
|
138 |
+
submission_repo = ""
|
139 |
+
team_fname = hf_hub_download(
|
140 |
repo_id=self.competition_id,
|
141 |
+
filename=f"submission_info/{team_id}.json",
|
142 |
token=self.token,
|
143 |
repo_type="dataset",
|
144 |
)
|
145 |
+
with open(team_fname, "r", encoding="utf-8") as f:
|
146 |
+
team_submission_info = json.load(f)
|
147 |
+
datetime_now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
|
148 |
|
149 |
# here goes all the default stuff for submission
|
150 |
+
team_submission_info["submissions"].append(
|
151 |
{
|
152 |
+
"datetime": datetime_now,
|
|
|
153 |
"submission_id": submission_id,
|
154 |
"submission_comment": submission_comment,
|
155 |
+
"submission_repo": submission_repo,
|
156 |
+
"submitted_by": user_id,
|
157 |
"status": "pending",
|
158 |
"selected": False,
|
159 |
"public_score": -1,
|
|
|
162 |
)
|
163 |
# count the number of times user has submitted today
|
164 |
todays_submissions = 0
|
165 |
+
todays_date = datetime.now().strftime("%Y-%m-%d")
|
166 |
+
for sub in team_submission_info["submissions"]:
|
167 |
+
submission_datetime = sub["datetime"]
|
168 |
+
submission_date = submission_datetime.split(" ")[0]
|
169 |
+
if submission_date == todays_date:
|
170 |
todays_submissions += 1
|
171 |
|
172 |
+
team_submission_info_json = json.dumps(team_submission_info, indent=4)
|
173 |
+
team_submission_info_json_bytes = team_submission_info_json.encode("utf-8")
|
174 |
+
team_submission_info_json_buffer = io.BytesIO(team_submission_info_json_bytes)
|
|
|
175 |
api = HfApi(token=self.token)
|
176 |
api.upload_file(
|
177 |
+
path_or_fileobj=team_submission_info_json_buffer,
|
178 |
+
path_in_repo=f"submission_info/{team_id}.json",
|
179 |
repo_id=self.competition_id,
|
180 |
repo_type="dataset",
|
181 |
)
|
182 |
return todays_submissions
|
183 |
|
184 |
+
def _download_team_subs(self, team_id):
|
185 |
+
team_fname = hf_hub_download(
|
186 |
repo_id=self.competition_id,
|
187 |
+
filename=f"submission_info/{team_id}.json",
|
188 |
token=self.token,
|
189 |
repo_type="dataset",
|
190 |
)
|
191 |
+
with open(team_fname, "r", encoding="utf-8") as f:
|
192 |
+
team_submission_info = json.load(f)
|
193 |
+
return team_submission_info["submissions"]
|
194 |
|
195 |
def update_selected_submissions(self, user_token, selected_submission_ids):
|
196 |
current_datetime = datetime.now()
|
|
|
199 |
|
200 |
user_info = self._get_user_info(user_token)
|
201 |
user_id = user_info["id"]
|
202 |
+
team_id = self._get_team_id(user_id)
|
203 |
|
204 |
+
team_fname = hf_hub_download(
|
205 |
repo_id=self.competition_id,
|
206 |
+
filename=f"submission_info/{team_id}.json",
|
207 |
token=self.token,
|
208 |
repo_type="dataset",
|
209 |
)
|
210 |
+
with open(team_fname, "r", encoding="utf-8") as f:
|
211 |
+
team_submission_info = json.load(f)
|
212 |
|
213 |
+
for sub in team_submission_info["submissions"]:
|
214 |
if sub["submission_id"] in selected_submission_ids:
|
215 |
sub["selected"] = True
|
216 |
else:
|
217 |
sub["selected"] = False
|
218 |
|
219 |
+
team_submission_info_json = json.dumps(team_submission_info, indent=4)
|
220 |
+
team_submission_info_json_bytes = team_submission_info_json.encode("utf-8")
|
221 |
+
team_submission_info_json_buffer = io.BytesIO(team_submission_info_json_bytes)
|
|
|
222 |
api = HfApi(token=self.token)
|
223 |
api.upload_file(
|
224 |
+
path_or_fileobj=team_submission_info_json_buffer,
|
225 |
+
path_in_repo=f"submission_info/{team_id}.json",
|
226 |
repo_id=self.competition_id,
|
227 |
repo_type="dataset",
|
228 |
)
|
229 |
|
230 |
+
def _get_team_subs(self, user_info, private=False):
|
|
|
231 |
user_id = user_info["id"]
|
232 |
+
team_id = self._get_team_id(user_id)
|
233 |
try:
|
234 |
+
team_submissions = self._download_team_subs(team_id)
|
235 |
except EntryNotFoundError:
|
236 |
logger.warning("No submissions found for user")
|
237 |
return pd.DataFrame(), pd.DataFrame()
|
238 |
|
239 |
+
submissions_df = pd.DataFrame(team_submissions)
|
240 |
|
241 |
if not private:
|
242 |
submissions_df = submissions_df.drop(columns=["private_score"])
|
|
|
319 |
private = False
|
320 |
if current_date_time >= self.end_date:
|
321 |
private = True
|
322 |
+
success_subs, failed_subs = self._get_team_subs(user_info, private=private)
|
323 |
return success_subs, failed_subs
|
324 |
|
325 |
+
def _get_team_id(self, user_id):
|
326 |
+
user_team = hf_hub_download(
|
327 |
+
repo_id=self.competition_id,
|
328 |
+
filename="user_team.json",
|
329 |
+
token=self.token,
|
330 |
+
repo_type="dataset",
|
331 |
+
)
|
332 |
+
with open(user_team, "r", encoding="utf-8") as f:
|
333 |
+
user_team = json.load(f)
|
334 |
+
|
335 |
+
if user_id in user_team:
|
336 |
+
return user_team[user_id]
|
337 |
+
|
338 |
+
# create a new team, if user is not in any team
|
339 |
+
team_id = str(uuid.uuid4())
|
340 |
+
user_team[user_id] = team_id
|
341 |
+
user_team_json = json.dumps(user_team, indent=4)
|
342 |
+
user_team_json_bytes = user_team_json.encode("utf-8")
|
343 |
+
user_team_json_buffer = io.BytesIO(user_team_json_bytes)
|
344 |
+
api = HfApi(token=self.token)
|
345 |
+
api.upload_file(
|
346 |
+
path_or_fileobj=user_team_json_buffer,
|
347 |
+
path_in_repo="user_team.json",
|
348 |
+
repo_id=self.competition_id,
|
349 |
+
repo_type="dataset",
|
350 |
+
)
|
351 |
+
return team_id
|
352 |
+
|
353 |
def new_submission(self, user_token, uploaded_file, submission_comment):
|
354 |
# verify token
|
355 |
user_info = self._get_user_info(user_token)
|
356 |
+
submission_id = str(uuid.uuid4())
|
357 |
+
user_id = user_info["id"]
|
358 |
+
team_id = self._get_team_id(user_id)
|
359 |
|
360 |
+
# check if team can submit to the competition
|
361 |
+
if self._check_team_submission_limit(team_id) is False:
|
362 |
raise SubmissionLimitError("Submission limit reached")
|
363 |
|
364 |
+
if self.competition_type == "generic":
|
365 |
+
bytes_data = uploaded_file.file.read()
|
366 |
+
# verify file is valid
|
367 |
+
if not self._verify_submission(bytes_data):
|
368 |
+
raise SubmissionError("Invalid submission file")
|
369 |
|
|
|
|
|
|
|
|
|
|
|
|
|
370 |
file_extension = uploaded_file.filename.split(".")[-1]
|
371 |
# upload file to hf hub
|
372 |
api = HfApi(token=self.token)
|
373 |
api.upload_file(
|
374 |
path_or_fileobj=bytes_data,
|
375 |
+
path_in_repo=f"submissions/{team_id}-{submission_id}.{file_extension}",
|
376 |
repo_id=self.competition_id,
|
377 |
repo_type="dataset",
|
378 |
)
|
|
|
379 |
submissions_made = self._increment_submissions(
|
380 |
+
team_id=team_id,
|
381 |
+
user_id=user_id,
|
382 |
+
submission_id=submission_id,
|
383 |
+
submission_comment=submission_comment,
|
384 |
+
submission_repo="",
|
385 |
+
)
|
386 |
+
else:
|
387 |
+
submission_repo = snapshot_download(
|
388 |
+
repo_id=uploaded_file,
|
389 |
+
local_dir=submission_id,
|
390 |
+
token=user_token,
|
391 |
+
repo_type="model",
|
392 |
+
)
|
393 |
+
api = HfApi(token=self.token)
|
394 |
+
competition_user = self.competition_id.split("/")[0]
|
395 |
+
api.create_repo(
|
396 |
+
repo_id=f"{competition_user}/{submission_id}",
|
397 |
+
repo_type="model",
|
398 |
+
private=True,
|
399 |
+
)
|
400 |
+
api.upload_folder(
|
401 |
+
folder_path=submission_repo,
|
402 |
+
repo_id=f"{competition_user}/{submission_id}",
|
403 |
+
repo_type="model",
|
404 |
+
)
|
405 |
+
submissions_made = self._increment_submissions(
|
406 |
+
team_id=team_id,
|
407 |
user_id=user_id,
|
408 |
submission_id=submission_id,
|
409 |
+
submission_comment=submission_comment,
|
410 |
+
submission_repo=uploaded_file,
|
411 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
412 |
remaining_submissions = self.submission_limit - submissions_made
|
413 |
return remaining_submissions
|
competitions/utils.py
CHANGED
@@ -73,7 +73,7 @@ def pause_space(params):
|
|
73 |
def download_submission_info(params):
|
74 |
user_fname = hf_hub_download(
|
75 |
repo_id=params.competition_id,
|
76 |
-
filename=f"submission_info/{params.
|
77 |
token=params.token,
|
78 |
repo_type="dataset",
|
79 |
)
|
@@ -90,7 +90,7 @@ def upload_submission_info(params, user_submission_info):
|
|
90 |
api = HfApi(token=params.token)
|
91 |
api.upload_file(
|
92 |
path_or_fileobj=user_submission_info_json_buffer,
|
93 |
-
path_in_repo=f"submission_info/{params.
|
94 |
repo_id=params.competition_id,
|
95 |
repo_type="dataset",
|
96 |
)
|
|
|
73 |
def download_submission_info(params):
|
74 |
user_fname = hf_hub_download(
|
75 |
repo_id=params.competition_id,
|
76 |
+
filename=f"submission_info/{params.team_id}.json",
|
77 |
token=params.token,
|
78 |
repo_type="dataset",
|
79 |
)
|
|
|
90 |
api = HfApi(token=params.token)
|
91 |
api.upload_file(
|
92 |
path_or_fileobj=user_submission_info_json_buffer,
|
93 |
+
path_in_repo=f"submission_info/{params.team_id}.json",
|
94 |
repo_id=params.competition_id,
|
95 |
repo_type="dataset",
|
96 |
)
|