Abhishek Thakur commited on
Commit
1094cbb
·
1 Parent(s): 936d8d9

working generic evaluation

Browse files
.dockerignore ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Local stuff
2
+ .DS_Store
3
+ .vscode/
4
+ .vim/
5
+ flagged/
6
+ *.csv
7
+
8
+ # Byte-compiled / optimized / DLL files
9
+ __pycache__/
10
+ *.py[cod]
11
+ *$py.class
12
+
13
+ # C extensions
14
+ *.so
15
+
16
+ # Distribution / packaging
17
+ .Python
18
+ build/
19
+ develop-eggs/
20
+ dist/
21
+ downloads/
22
+ eggs/
23
+ .eggs/
24
+ lib/
25
+ lib64/
26
+ parts/
27
+ sdist/
28
+ var/
29
+ wheels/
30
+ pip-wheel-metadata/
31
+ share/python-wheels/
32
+ *.egg-info/
33
+ .installed.cfg
34
+ *.egg
35
+ MANIFEST
36
+
37
+ # PyInstaller
38
+ # Usually these files are written by a python script from a template
39
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
40
+ *.manifest
41
+ *.spec
42
+
43
+ # Installer logs
44
+ pip-log.txt
45
+ pip-delete-this-directory.txt
46
+
47
+ # Unit test / coverage reports
48
+ htmlcov/
49
+ .tox/
50
+ .nox/
51
+ .coverage
52
+ .coverage.*
53
+ .cache
54
+ nosetests.xml
55
+ coverage.xml
56
+ *.cover
57
+ *.py,cover
58
+ .hypothesis/
59
+ .pytest_cache/
60
+
61
+ # Translations
62
+ *.mo
63
+ *.pot
64
+
65
+ # Django stuff:
66
+ *.log
67
+ local_settings.py
68
+ db.sqlite3
69
+ db.sqlite3-journal
70
+
71
+ # Flask stuff:
72
+ instance/
73
+ .webassets-cache
74
+
75
+ # Scrapy stuff:
76
+ .scrapy
77
+
78
+ # Sphinx documentation
79
+ docs/_build/
80
+
81
+ # PyBuilder
82
+ target/
83
+
84
+ # Jupyter Notebook
85
+ .ipynb_checkpoints
86
+
87
+ # IPython
88
+ profile_default/
89
+ ipython_config.py
90
+
91
+ # pyenv
92
+ .python-version
93
+
94
+ # pipenv
95
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
96
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
97
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
98
+ # install all needed dependencies.
99
+ #Pipfile.lock
100
+
101
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
102
+ __pypackages__/
103
+
104
+ # Celery stuff
105
+ celerybeat-schedule
106
+ celerybeat.pid
107
+
108
+ # SageMath parsed files
109
+ *.sage.py
110
+
111
+ # Environments
112
+ *.env
113
+ .env
114
+ .venv
115
+ env/
116
+ venv/
117
+ ENV/
118
+ env.bak/
119
+ venv.bak/
120
+
121
+ # Spyder project settings
122
+ .spyderproject
123
+ .spyproject
124
+
125
+ # Rope project settings
126
+ .ropeproject
127
+
128
+ # mkdocs documentation
129
+ /site
130
+
131
+ # mypy
132
+ .mypy_cache/
133
+ .dmypy.json
134
+ dmypy.json
135
+
136
+ # Pyre type checker
137
+ .pyre/
138
+
139
+ # Terraform stuff
140
+ *.tfstate
141
+ *.tfstate.backup
142
+ .terraform**
143
+ **.tfvars
144
+
145
+ # Alembic / database artifcats
146
+ **.db
Dockerfile CHANGED
@@ -1,9 +1,9 @@
1
- FROM python:3.8.9
2
 
3
  ENV DEBIAN_FRONTEND=noninteractive \
4
  TZ=UTC
5
 
6
- RUN pip install pip==23.0.1
7
 
8
  WORKDIR /app
9
  RUN mkdir -p /app/.cache
@@ -14,10 +14,6 @@ ENV HOME=/app
14
 
15
  ENV PYTHONPATH=$HOME/app \
16
  PYTHONUNBUFFERED=1 \
17
- GRADIO_ALLOW_FLAGGING=never \
18
- GRADIO_NUM_PORTS=1 \
19
- GRADIO_SERVER_NAME=0.0.0.0 \
20
- GRADIO_THEME=huggingface \
21
  SYSTEM=spaces
22
 
23
 
@@ -26,7 +22,8 @@ RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
26
  && rm -f Miniconda3-latest-Linux-x86_64.sh
27
  ENV PATH /app/miniconda/bin:$PATH
28
 
29
- RUN conda create -p /app/env -y python=3.8
 
30
 
31
 
32
  SHELL ["conda", "run","--no-capture-output", "-p","/app/env", "/bin/bash", "-c"]
 
1
+ FROM ubuntu:22.04
2
 
3
  ENV DEBIAN_FRONTEND=noninteractive \
4
  TZ=UTC
5
 
6
+ RUN pip install pip==23.3.2
7
 
8
  WORKDIR /app
9
  RUN mkdir -p /app/.cache
 
14
 
15
  ENV PYTHONPATH=$HOME/app \
16
  PYTHONUNBUFFERED=1 \
 
 
 
 
17
  SYSTEM=spaces
18
 
19
 
 
22
  && rm -f Miniconda3-latest-Linux-x86_64.sh
23
  ENV PATH /app/miniconda/bin:$PATH
24
 
25
+ RUN conda create -p /app/env -y python=3.10 \
26
+ && conda clean -ya
27
 
28
 
29
  SHELL ["conda", "run","--no-capture-output", "-p","/app/env", "/bin/bash", "-c"]
competitions/__init__.py CHANGED
@@ -1,7 +1,5 @@
1
  import os
2
 
3
- from .info import CompetitionInfo
4
-
5
 
6
  __version__ = "0.1.1"
7
 
@@ -9,10 +7,3 @@ MOONLANDING_URL = os.getenv("MOONLANDING_URL", "https://huggingface.co")
9
  COMPETITION_ID = os.getenv("COMPETITION_ID")
10
  AUTOTRAIN_USERNAME = os.getenv("AUTOTRAIN_USERNAME")
11
  AUTOTRAIN_TOKEN = os.getenv("AUTOTRAIN_TOKEN")
12
- AUTOTRAIN_BACKEND_API = os.getenv("AUTOTRAIN_BACKEND_API", "https://api.autotrain.huggingface.co")
13
- BOT_TOKEN = os.getenv("BOT_TOKEN")
14
-
15
- if COMPETITION_ID is not None:
16
- competition_info = CompetitionInfo(competition_id=COMPETITION_ID, autotrain_token=AUTOTRAIN_TOKEN)
17
- else:
18
- competition_info = None
 
1
  import os
2
 
 
 
3
 
4
  __version__ = "0.1.1"
5
 
 
7
  COMPETITION_ID = os.getenv("COMPETITION_ID")
8
  AUTOTRAIN_USERNAME = os.getenv("AUTOTRAIN_USERNAME")
9
  AUTOTRAIN_TOKEN = os.getenv("AUTOTRAIN_TOKEN")
 
 
 
 
 
 
 
competitions/app.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
 
3
  from fastapi import FastAPI, File, Form, Request, UploadFile
4
  from fastapi.responses import HTMLResponse, JSONResponse
@@ -8,12 +9,14 @@ from pydantic import BaseModel
8
 
9
  from competitions.info import CompetitionInfo
10
  from competitions.leaderboard import Leaderboard
 
11
  from competitions.submissions import Submissions
12
 
13
 
14
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
15
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
16
  COMPETITION_ID = os.getenv("COMPETITION_ID")
 
17
  COMP_INFO = CompetitionInfo(competition_id=COMPETITION_ID, autotrain_token=HF_TOKEN)
18
 
19
 
@@ -21,6 +24,15 @@ class User(BaseModel):
21
  user_token: str
22
 
23
 
 
 
 
 
 
 
 
 
 
24
  app = FastAPI()
25
  static_path = os.path.join(BASE_DIR, "static")
26
  app.mount("/static", StaticFiles(directory=static_path), name="static")
@@ -107,9 +119,9 @@ async def new_submission(
107
  token=HF_TOKEN,
108
  )
109
  if COMP_INFO.competition_type == "generic":
110
- resp = sub.new_submission(token, submission_file)
111
  return {"response": f"Success! You have {resp} submissions remaining today."}
112
  elif COMP_INFO.competition_type == "code":
113
- resp = sub.new_submission(token, hub_model)
114
  return {"response": f"Success! You have {resp} submissions remaining today."}
115
  return {"response": "Invalid competition type"}
 
1
  import os
2
+ import threading
3
 
4
  from fastapi import FastAPI, File, Form, Request, UploadFile
5
  from fastapi.responses import HTMLResponse, JSONResponse
 
9
 
10
  from competitions.info import CompetitionInfo
11
  from competitions.leaderboard import Leaderboard
12
+ from competitions.runner import JobRunner
13
  from competitions.submissions import Submissions
14
 
15
 
16
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
17
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
18
  COMPETITION_ID = os.getenv("COMPETITION_ID")
19
+ OUTPUT_PATH = os.getenv("OUTPUT_PATH", "/tmp/model")
20
  COMP_INFO = CompetitionInfo(competition_id=COMPETITION_ID, autotrain_token=HF_TOKEN)
21
 
22
 
 
24
  user_token: str
25
 
26
 
27
+ def run_job_runner():
28
+ job_runner = JobRunner(token=HF_TOKEN, competition_info=COMP_INFO, output_path=OUTPUT_PATH)
29
+ job_runner.run()
30
+
31
+
32
+ thread = threading.Thread(target=run_job_runner)
33
+ thread.start()
34
+
35
+
36
  app = FastAPI()
37
  static_path = os.path.join(BASE_DIR, "static")
38
  app.mount("/static", StaticFiles(directory=static_path), name="static")
 
119
  token=HF_TOKEN,
120
  )
121
  if COMP_INFO.competition_type == "generic":
122
+ resp = sub.new_submission(token, submission_file, submission_comment)
123
  return {"response": f"Success! You have {resp} submissions remaining today."}
124
  elif COMP_INFO.competition_type == "code":
125
+ resp = sub.new_submission(token, hub_model, submission_comment)
126
  return {"response": f"Success! You have {resp} submissions remaining today."}
127
  return {"response": "Invalid competition type"}
competitions/competitions.py DELETED
@@ -1,184 +0,0 @@
1
- from datetime import datetime
2
- from functools import partial
3
-
4
- import gradio as gr
5
-
6
- from . import AUTOTRAIN_BACKEND_API, AUTOTRAIN_TOKEN, AUTOTRAIN_USERNAME, COMPETITION_ID, competition_info
7
- from .errors import PastDeadlineError, SubmissionError, SubmissionLimitError
8
- from .leaderboard import Leaderboard
9
- from .submissions import Submissions
10
- from .text import (
11
- NO_SUBMISSIONS,
12
- SUBMISSION_LIMIT_REACHED,
13
- SUBMISSION_SELECTION_TEXT,
14
- SUBMISSION_SUCCESS,
15
- SUBMISSION_TEXT,
16
- )
17
-
18
-
19
- leaderboard = Leaderboard(
20
- end_date=competition_info.end_date,
21
- eval_higher_is_better=competition_info.eval_higher_is_better,
22
- max_selected_submissions=competition_info.selection_limit,
23
- competition_id=COMPETITION_ID,
24
- autotrain_token=AUTOTRAIN_TOKEN,
25
- )
26
-
27
- submissions = Submissions(
28
- competition_id=competition_info.competition_id,
29
- submission_limit=competition_info.submission_limit,
30
- end_date=competition_info.end_date,
31
- autotrain_username=AUTOTRAIN_USERNAME,
32
- autotrain_token=AUTOTRAIN_TOKEN,
33
- autotrain_backend_api=AUTOTRAIN_BACKEND_API,
34
- )
35
-
36
-
37
- def _new_submission(user_token, submission_file):
38
- try:
39
- remaining_subs = submissions.new_submission(user_token, submission_file)
40
- return SUBMISSION_SUCCESS.format(remaining_subs)
41
- except SubmissionLimitError:
42
- return SUBMISSION_LIMIT_REACHED
43
- except SubmissionError:
44
- return "Something went wrong. Please try again later."
45
-
46
-
47
- def _my_submissions(user_token):
48
- df, failed_df = submissions.my_submissions(user_token)
49
- if len(df) == 0:
50
- return [
51
- gr.Markdown.update(visible=True, value=NO_SUBMISSIONS),
52
- gr.DataFrame.update(visible=False),
53
- gr.DataFrame.update(
54
- visible=True if len(failed_df) > 0 else False, value=failed_df if len(failed_df) > 0 else None
55
- ),
56
- gr.TextArea.update(visible=False),
57
- gr.Button.update(visible=False),
58
- ]
59
- selected_submission_ids = df[df["selected"] == True]["submission_id"].values.tolist()
60
- failed_selected_submission_ids = failed_df[failed_df["selected"] == True]["submission_id"].values.tolist()
61
- selected_submission_ids.extend(failed_selected_submission_ids)
62
- if len(selected_submission_ids) > 0:
63
- return [
64
- gr.Markdown.update(visible=True),
65
- gr.DataFrame.update(visible=True, value=df),
66
- gr.DataFrame.update(
67
- visible=True if len(failed_df) > 0 else False, value=failed_df if len(failed_df) > 0 else None
68
- ),
69
- gr.TextArea.update(visible=True, value="\n".join(selected_submission_ids), interactive=True),
70
- gr.Button.update(visible=True),
71
- ]
72
- return [
73
- gr.Markdown.update(visible=False),
74
- gr.DataFrame.update(visible=True, value=df),
75
- gr.DataFrame.update(
76
- visible=True if len(failed_df) > 0 else False, value=failed_df if len(failed_df) > 0 else None
77
- ),
78
- gr.TextArea.update(visible=True, interactive=True),
79
- gr.Button.update(visible=True),
80
- ]
81
-
82
-
83
- def _update_selected_submissions(user_token, submission_ids):
84
- submission_ids = submission_ids.split("\n")
85
- submission_ids = [sid.strip() for sid in submission_ids]
86
- submission_ids = [sid for sid in submission_ids if len(sid) > 0]
87
- if len(submission_ids) > competition_info.selection_limit:
88
- raise ValueError(
89
- f"You can select only {competition_info.selection_limit} submissions. You selected {len(submission_ids)} submissions."
90
- )
91
- try:
92
- submissions.update_selected_submissions(user_token, submission_ids)
93
- except PastDeadlineError:
94
- return [
95
- gr.Markdown.update(visible=True, value="You can no longer select submissions after the deadline."),
96
- gr.DataFrame.update(visible=False),
97
- gr.DataFrame.update(visible=False),
98
- gr.TextArea.update(visible=False),
99
- gr.Button.update(visible=False),
100
- ]
101
- return _my_submissions(user_token)
102
-
103
-
104
- def _fetch_leaderboard(private):
105
- if private:
106
- current_date_time = datetime.now()
107
- if current_date_time < competition_info.end_date:
108
- return [
109
- gr.DataFrame.update(visible=False),
110
- gr.Markdown.update(
111
- visible=True, value=f"Private Leaderboard will be available on {competition_info.end_date} UTC."
112
- ),
113
- ]
114
- df = leaderboard.fetch(private=private)
115
- # df["name"] = df["name"].apply(make_clickable_user)
116
- # df.to_csv("public_leaderboard.csv" if not private else "private_leaderboard.csv", index=False)
117
- num_teams = len(df)
118
- return [
119
- gr.DataFrame.update(visible=True, value=df),
120
- gr.Markdown.update(visible=True, value=f"Number of teams: {num_teams}"),
121
- ]
122
-
123
-
124
- with gr.Blocks(css=".tabitem {padding: 25px}") as demo:
125
- with gr.Tabs() as tab_container:
126
- with gr.TabItem("Overview", id="overview"):
127
- gr.Markdown(f"{competition_info.competition_description}")
128
- with gr.TabItem("Dataset", id="dataset_tab") as dataset_tab:
129
- gr.Markdown(f"{competition_info.dataset_description}")
130
- with gr.TabItem("Public Leaderboard", id="public_leaderboard") as public_leaderboard:
131
- output_text_public = gr.Markdown()
132
- output_df_public = gr.DataFrame(row_count=(50, "dynamic"), visible=False)
133
- with gr.TabItem("Private Leaderboard", id="private_leaderboard") as private_leaderboard:
134
- output_text_private = gr.Markdown()
135
- output_df_private = gr.DataFrame(row_count=(50, "dynamic"), visible=False)
136
- with gr.TabItem("New Submission", id="new_submission"):
137
- if competition_info.submission_desc is None:
138
- gr.Markdown(SUBMISSION_TEXT.format(competition_info.submission_limit))
139
- else:
140
- gr.Markdown(f"{competition_info.submission_desc}")
141
- user_token = gr.Textbox(
142
- max_lines=1, value="", label="Please enter your Hugging Face token (read only)", type="password"
143
- )
144
- uploaded_file = gr.File()
145
- output_text = gr.Markdown(visible=True, show_label=False)
146
- new_sub_button = gr.Button("Upload Submission")
147
- new_sub_button.click(
148
- fn=_new_submission,
149
- inputs=[user_token, uploaded_file],
150
- outputs=[output_text],
151
- )
152
- with gr.TabItem("My Submissions", id="my_submissions"):
153
- gr.Markdown(SUBMISSION_SELECTION_TEXT.format(competition_info.selection_limit))
154
- user_token = gr.Textbox(
155
- max_lines=1, value="", label="Please enter your Hugging Face token (read only)", type="password"
156
- )
157
- output_text = gr.Markdown(visible=True, show_label=False)
158
- output_df = gr.DataFrame(visible=False, label="Succesful Submissions")
159
- failed_df = gr.DataFrame(visible=False, label="Failed Submissions")
160
- selected_submissions = gr.TextArea(
161
- visible=False,
162
- label="Selected Submissions (one submission id per line)",
163
- max_lines=competition_info.selection_limit,
164
- lines=competition_info.selection_limit,
165
- )
166
- update_selected_submissions = gr.Button("Update Selected Submissions", visible=False)
167
- my_subs_button = gr.Button("Fetch Submissions")
168
- my_subs_button.click(
169
- fn=_my_submissions,
170
- inputs=[user_token],
171
- outputs=[output_text, output_df, failed_df, selected_submissions, update_selected_submissions],
172
- )
173
- update_selected_submissions.click(
174
- fn=_update_selected_submissions,
175
- inputs=[user_token, selected_submissions],
176
- outputs=[output_text, output_df, failed_df, selected_submissions, update_selected_submissions],
177
- )
178
-
179
- fetch_lb_partial = partial(_fetch_leaderboard, private=False)
180
- public_leaderboard.select(fetch_lb_partial, inputs=[], outputs=[output_df_public, output_text_public])
181
- fetch_lb_partial_private = partial(_fetch_leaderboard, private=True)
182
- private_leaderboard.select(
183
- fetch_lb_partial_private, inputs=[], outputs=[output_df_private, output_text_private]
184
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
competitions/compute_metrics.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import partial
2
+
3
+ import pandas as pd
4
+ from huggingface_hub import hf_hub_download
5
+ from sklearn import metrics
6
+
7
+
8
+ def compute_metrics(params):
9
+ solution_file = hf_hub_download(
10
+ repo_id=params.competition_id,
11
+ filename="solution.csv",
12
+ token=params.token,
13
+ repo_type="dataset",
14
+ )
15
+
16
+ solution_df = pd.read_csv(solution_file)
17
+
18
+ submission_filename = f"submissions/{params.user_id}-{params.submission_id}.csv"
19
+ submission_file = hf_hub_download(
20
+ repo_id=params.competition_id,
21
+ filename=submission_filename,
22
+ token=params.token,
23
+ repo_type="dataset",
24
+ )
25
+ submission_df = pd.read_csv(submission_file)
26
+
27
+ public_ids = solution_df[solution_df.split == "public"][params.submission_id_col].values
28
+ private_ids = solution_df[solution_df.split == "private"][params.submission_id_col].values
29
+
30
+ public_solution_df = solution_df[solution_df[params.submission_id_col].isin(public_ids)]
31
+ public_submission_df = submission_df[submission_df[params.submission_id_col].isin(public_ids)]
32
+
33
+ private_solution_df = solution_df[solution_df[params.submission_id_col].isin(private_ids)]
34
+ private_submission_df = submission_df[submission_df[params.submission_id_col].isin(private_ids)]
35
+
36
+ public_solution_df = public_solution_df.sort_values(params.submission_id_col).reset_index(drop=True)
37
+ public_submission_df = public_submission_df.sort_values(params.submission_id_col).reset_index(drop=True)
38
+
39
+ private_solution_df = private_solution_df.sort_values(params.submission_id_col).reset_index(drop=True)
40
+ private_submission_df = private_submission_df.sort_values(params.submission_id_col).reset_index(drop=True)
41
+
42
+ if params.metric == "f1-macro":
43
+ _metric = partial(metrics.f1_score, average="macro")
44
+ target_cols = [col for col in solution_df.columns if col not in [params.submission_id_col, "split"]]
45
+ public_score = _metric(public_solution_df[target_cols], public_submission_df[target_cols])
46
+ private_score = _metric(private_solution_df[target_cols], private_submission_df[target_cols])
47
+ else:
48
+ _metric = getattr(metrics, params.metric)
49
+ target_cols = [col for col in solution_df.columns if col not in [params.submission_id_col, "split"]]
50
+ public_score = _metric(private_solution_df[target_cols], public_submission_df[target_cols])
51
+ private_score = _metric(private_solution_df[target_cols], private_submission_df[target_cols])
52
+
53
+ # scores can also be dictionaries for multiple metrics
54
+ evaluation = {
55
+ "public_score": public_score,
56
+ "private_score": private_score,
57
+ }
58
+ return evaluation
competitions/evaluate.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import json
3
+
4
+ from huggingface_hub import snapshot_download
5
+ from loguru import logger
6
+
7
+ from competitions import utils
8
+ from competitions.compute_metrics import compute_metrics
9
+ from competitions.params import EvalParams
10
+
11
+
12
+ def parse_args():
13
+ parser = argparse.ArgumentParser()
14
+ parser.add_argument("--config", type=str, required=True)
15
+ return parser.parse_args()
16
+
17
+
18
+ def generate_submission_file(params):
19
+ logger.info("Downloading submission dataset")
20
+ snapshot_download(
21
+ repo_id=params.data_path,
22
+ local_dir=params.output_path,
23
+ token=params.token,
24
+ repo_type="dataset",
25
+ )
26
+
27
+
28
+ @utils.monitor
29
+ def run(params):
30
+ if isinstance(params, dict):
31
+ params = EvalParams(**params)
32
+
33
+ utils.update_submission_status(params, "processing")
34
+
35
+ if params.competition_type == "code":
36
+ generate_submission_file(params)
37
+
38
+ public_score, private_score = compute_metrics(params)
39
+
40
+ utils.update_submission_score(params, public_score, private_score)
41
+ utils.update_submission_status(params, "success")
42
+ utils.pause_space(params)
43
+
44
+
45
+ if __name__ == "__main__":
46
+ args = parse_args()
47
+ _params = json.load(open(args.config, encoding="utf-8"))
48
+ _params = EvalParams(**_params)
49
+ run(_params)
competitions/info.py CHANGED
@@ -107,3 +107,21 @@ class CompetitionInfo:
107
  @property
108
  def competition_type(self):
109
  return self.config["COMPETITION_TYPE"].lower().strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  @property
108
  def competition_type(self):
109
  return self.config["COMPETITION_TYPE"].lower().strip()
110
+
111
+ @property
112
+ def metric(self):
113
+ return self.config["EVAL_METRIC"]
114
+
115
+ @property
116
+ def submission_id_col(self):
117
+ return self.config["SUBMISSION_ID_COLUMN"]
118
+
119
+ @property
120
+ def submission_cols(self):
121
+ cols = self.config["SUBMISSION_COLUMNS"].split(",")
122
+ cols = [c.strip() for c in cols]
123
+ return cols
124
+
125
+ @property
126
+ def submission_rows(self):
127
+ return self.config["SUBMISSION_ROWS"]
competitions/params.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List
3
+
4
+ from pydantic import BaseModel
5
+
6
+
7
+ class EvalParams(BaseModel):
8
+ competition_id: str
9
+ competition_type: str
10
+ metric: str
11
+ token: str
12
+ user_id: str
13
+ submission_id: str
14
+ submission_id_col: str
15
+ submission_cols: List[str]
16
+ submission_rows: int
17
+ output_path: str
18
+
19
+ class Config:
20
+ protected_namespaces = ()
21
+
22
+ def save(self, output_dir):
23
+ """
24
+ Save parameters to a json file.
25
+ """
26
+ os.makedirs(output_dir, exist_ok=True)
27
+ path = os.path.join(output_dir, "params.json")
28
+ # save formatted json
29
+ with open(path, "w", encoding="utf-8") as f:
30
+ f.write(self.model_dump_json(indent=4))
competitions/runner.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import glob
2
+ import json
3
+ import os
4
+ import time
5
+ from dataclasses import dataclass
6
+
7
+ import pandas as pd
8
+ from huggingface_hub import snapshot_download
9
+ from loguru import logger
10
+
11
+ from competitions.info import CompetitionInfo
12
+ from competitions.utils import run_evaluation
13
+
14
+
15
+ @dataclass
16
+ class JobRunner:
17
+ competition_info: CompetitionInfo
18
+ token: str
19
+ output_path: str
20
+
21
+ def __post_init__(self):
22
+ self.competition_id = self.competition_info.competition_id
23
+ self.competition_type = self.competition_info.competition_type
24
+ self.metric = self.competition_info.metric
25
+ self.submission_id_col = self.competition_info.submission_id_col
26
+ self.submission_cols = self.competition_info.submission_cols
27
+ self.submission_rows = self.competition_info.submission_rows
28
+
29
+ def get_pending_subs(self):
30
+ user_jsons = snapshot_download(
31
+ repo_id=self.competition_id,
32
+ allow_patterns="submission_info/*.json",
33
+ token=self.token,
34
+ repo_type="dataset",
35
+ )
36
+ user_jsons = glob.glob(os.path.join(user_jsons, "submission_info/*.json"))
37
+ pending_submissions = []
38
+ for _json in user_jsons:
39
+ _json = json.load(open(_json, "r", encoding="utf-8"))
40
+ user_id = _json["id"]
41
+ for sub in _json["submissions"]:
42
+ # if sub["status"] == "pending":
43
+ pending_submissions.append(
44
+ {
45
+ "user_id": user_id,
46
+ "submission_id": sub["submission_id"],
47
+ "date": sub["date"],
48
+ "time": sub["time"],
49
+ }
50
+ )
51
+ if len(pending_submissions) == 0:
52
+ logger.info("No pending submissions.")
53
+ return None
54
+ logger.info(f"Found {len(pending_submissions)} pending submissions.")
55
+ pending_submissions = pd.DataFrame(pending_submissions)
56
+ pending_submissions = pending_submissions.sort_values(by=["date", "time"])
57
+ pending_submissions = pending_submissions.reset_index(drop=True)
58
+ return pending_submissions
59
+
60
+ def run_local(self, pending_submissions):
61
+ for _, row in pending_submissions.iterrows():
62
+ user_id = row["user_id"]
63
+ submission_id = row["submission_id"]
64
+ eval_params = {
65
+ "competition_id": self.competition_id,
66
+ "competition_type": self.competition_type,
67
+ "metric": self.metric,
68
+ "token": self.token,
69
+ "user_id": user_id,
70
+ "submission_id": submission_id,
71
+ "submission_id_col": self.submission_id_col,
72
+ "submission_cols": self.submission_cols,
73
+ "submission_rows": self.submission_rows,
74
+ "output_path": self.output_path,
75
+ }
76
+ eval_params = json.dumps(eval_params)
77
+ eval_pid = run_evaluation(eval_params, local=True, wait=True)
78
+ logger.info(f"New evaluation process started with pid {eval_pid}.")
79
+
80
+ def run(self):
81
+ while True:
82
+ pending_submissions = self.get_pending_subs()
83
+ if pending_submissions is None:
84
+ time.sleep(5)
85
+ continue
86
+ if self.competition_type == "generic":
87
+ self.run_local(pending_submissions)
88
+ time.sleep(5)
competitions/submissions.py CHANGED
@@ -22,7 +22,7 @@ class Submissions:
22
 
23
  def __post_init__(self):
24
  self.public_sub_columns = [
25
- "date",
26
  "submission_id",
27
  "public_score",
28
  "submission_comment",
@@ -30,7 +30,7 @@ class Submissions:
30
  "status",
31
  ]
32
  self.private_sub_columns = [
33
- "date",
34
  "submission_id",
35
  "public_score",
36
  "private_score",
@@ -326,16 +326,15 @@ class Submissions:
326
  raise SubmissionLimitError("Submission limit reached")
327
 
328
  logger.info(type(uploaded_file))
 
329
 
330
- with open(uploaded_file.name, "rb") as f:
331
- bytes_data = f.read()
332
  # verify file is valid
333
  if not self._verify_submission(bytes_data):
334
  raise SubmissionError("Invalid submission file")
335
  else:
336
  user_id = user_info["id"]
337
  submission_id = str(uuid.uuid4())
338
- file_extension = uploaded_file.orig_name.split(".")[-1]
339
  # upload file to hf hub
340
  api = HfApi(token=self.token)
341
  api.upload_file(
 
22
 
23
  def __post_init__(self):
24
  self.public_sub_columns = [
25
+ "datetime",
26
  "submission_id",
27
  "public_score",
28
  "submission_comment",
 
30
  "status",
31
  ]
32
  self.private_sub_columns = [
33
+ "datetime",
34
  "submission_id",
35
  "public_score",
36
  "private_score",
 
326
  raise SubmissionLimitError("Submission limit reached")
327
 
328
  logger.info(type(uploaded_file))
329
+ bytes_data = uploaded_file.file.read()
330
 
 
 
331
  # verify file is valid
332
  if not self._verify_submission(bytes_data):
333
  raise SubmissionError("Invalid submission file")
334
  else:
335
  user_id = user_info["id"]
336
  submission_id = str(uuid.uuid4())
337
+ file_extension = uploaded_file.filename.split(".")[-1]
338
  # upload file to hf hub
339
  api = HfApi(token=self.token)
340
  api.upload_file(
competitions/templates/index.html CHANGED
@@ -313,10 +313,11 @@
313
  </div>
314
  {% endif %}
315
  <div class="form-group mt-2">
316
- <label for="params" class="text-sm font-medium text-gray-700">Submission description (optional)
 
317
  </label>
318
- <textarea id="params" name="params" rows="5" class="p-2.5 w-full text-sm text-gray-900"
319
- placeholder=""></textarea>
320
  </div>
321
  <div class="form-actions mt-6">
322
  <button data-modal-hide="submission-modal" type="button"
@@ -356,10 +357,12 @@
356
  return;
357
  }
358
 
359
- // Token should be added here if available
360
  var token = document.getElementById('user_token').value;
361
  formData.append('token', token);
362
 
 
 
 
363
  fetch('/new_submission', {
364
  method: 'POST',
365
  body: formData
 
313
  </div>
314
  {% endif %}
315
  <div class="form-group mt-2">
316
+ <label for="submission_comment" class="text-sm font-medium text-gray-700">Submission description
317
+ (optional)
318
  </label>
319
+ <textarea id="submission_comment" name="submission_comment" rows="5"
320
+ class="p-2.5 w-full text-sm text-gray-900" placeholder=""></textarea>
321
  </div>
322
  <div class="form-actions mt-6">
323
  <button data-modal-hide="submission-modal" type="button"
 
357
  return;
358
  }
359
 
 
360
  var token = document.getElementById('user_token').value;
361
  formData.append('token', token);
362
 
363
+ var submissionComment = document.getElementById('submission_comment').value;
364
+ formData.append('submission_comment', submissionComment);
365
+
366
  fetch('/new_submission', {
367
  method: 'POST',
368
  body: formData
competitions/utils.py CHANGED
@@ -1,33 +1,16 @@
 
 
 
 
 
 
1
  import requests
 
2
  from loguru import logger
3
 
4
- from . import MOONLANDING_URL
5
-
6
-
7
- def get_auth_headers(token: str, prefix: str = "Bearer"):
8
- return {"Authorization": f"{prefix} {token}"}
9
-
10
-
11
- def http_post(path: str, token: str, payload=None, domain: str = None, params=None) -> requests.Response:
12
- """HTTP POST request to the AutoNLP API, raises UnreachableAPIError if the API cannot be reached"""
13
- try:
14
- response = requests.post(
15
- url=domain + path, json=payload, headers=get_auth_headers(token=token), allow_redirects=True, params=params
16
- )
17
- except requests.exceptions.ConnectionError:
18
- logger.error("❌ Failed to reach AutoNLP API, check your internet connection")
19
- response.raise_for_status()
20
- return response
21
 
22
-
23
- def http_get(path: str, token: str, domain: str = None) -> requests.Response:
24
- """HTTP POST request to the AutoNLP API, raises UnreachableAPIError if the API cannot be reached"""
25
- try:
26
- response = requests.get(url=domain + path, headers=get_auth_headers(token=token), allow_redirects=True)
27
- except requests.exceptions.ConnectionError:
28
- logger.error("❌ Failed to reach AutoNLP API, check your internet connection")
29
- response.raise_for_status()
30
- return response
31
 
32
 
33
  def user_authentication(token):
@@ -53,3 +36,99 @@ def user_authentication(token):
53
  def make_clickable_user(user_id):
54
  link = "https://huggingface.co/" + user_id
55
  return f'<a target="_blank" href="{link}">{user_id}</a>'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import json
3
+ import os
4
+ import subprocess
5
+ import traceback
6
+
7
  import requests
8
+ from huggingface_hub import HfApi, hf_hub_download
9
  from loguru import logger
10
 
11
+ from competitions.params import EvalParams
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ from . import MOONLANDING_URL
 
 
 
 
 
 
 
 
14
 
15
 
16
  def user_authentication(token):
 
36
  def make_clickable_user(user_id):
37
  link = "https://huggingface.co/" + user_id
38
  return f'<a target="_blank" href="{link}">{user_id}</a>'
39
+
40
+
41
+ def run_evaluation(params, local=False, wait=False):
42
+ params = json.loads(params)
43
+ if isinstance(params, str):
44
+ params = json.loads(params)
45
+ params = EvalParams(**params)
46
+ if not local:
47
+ params.output_path = "/tmp/model"
48
+ params.save(output_dir=params.output_path)
49
+ cmd = [
50
+ "python",
51
+ "-m",
52
+ "competitions.evaluate",
53
+ "--config",
54
+ os.path.join(params.output_path, "params.json"),
55
+ ]
56
+
57
+ cmd = [str(c) for c in cmd]
58
+ logger.info(cmd)
59
+ env = os.environ.copy()
60
+ process = subprocess.Popen(" ".join(cmd), shell=True, env=env)
61
+ if wait:
62
+ process.wait()
63
+ return process.pid
64
+
65
+
66
+ def pause_space(params):
67
+ if "SPACE_ID" in os.environ:
68
+ logger.info("Pausing space...")
69
+ api = HfApi(token=params.token)
70
+ api.pause_space(repo_id=os.environ["SPACE_ID"])
71
+
72
+
73
+ def download_submission_info(params):
74
+ user_fname = hf_hub_download(
75
+ repo_id=params.competition_id,
76
+ filename=f"submission_info/{params.user_id}.json",
77
+ token=params.token,
78
+ repo_type="dataset",
79
+ )
80
+ with open(user_fname, "r", encoding="utf-8") as f:
81
+ user_submission_info = json.load(f)
82
+
83
+ return user_submission_info
84
+
85
+
86
+ def upload_submission_info(params, user_submission_info):
87
+ user_submission_info_json = json.dumps(user_submission_info, indent=4)
88
+ user_submission_info_json_bytes = user_submission_info_json.encode("utf-8")
89
+ user_submission_info_json_buffer = io.BytesIO(user_submission_info_json_bytes)
90
+ api = HfApi(token=params.token)
91
+ api.upload_file(
92
+ path_or_fileobj=user_submission_info_json_buffer,
93
+ path_in_repo=f"submission_info/{params.user_id}.json",
94
+ repo_id=params.competition_id,
95
+ repo_type="dataset",
96
+ )
97
+
98
+
99
+ def update_submission_status(params, status):
100
+ user_submission_info = download_submission_info(params)
101
+ for submission in user_submission_info["submissions"]:
102
+ if submission["submission_id"] == params.submission_id:
103
+ submission["status"] = status
104
+ break
105
+ upload_submission_info(params, user_submission_info)
106
+
107
+
108
+ def update_submission_score(params, public_score, private_score):
109
+ user_submission_info = download_submission_info(params)
110
+ for submission in user_submission_info["submissions"]:
111
+ if submission["submission_id"] == params.submission_id:
112
+ submission["public_score"] = public_score
113
+ submission["private_score"] = private_score
114
+ submission["status"] = "done"
115
+ break
116
+ upload_submission_info(params, user_submission_info)
117
+
118
+
119
+ def monitor(func):
120
+ def wrapper(*args, **kwargs):
121
+ params = kwargs.get("params", None)
122
+ if params is None and len(args) > 0:
123
+ params = args[0]
124
+
125
+ try:
126
+ return func(*args, **kwargs)
127
+ except Exception as e:
128
+ error_message = f"""{func.__name__} has failed due to an exception: {traceback.format_exc()}"""
129
+ logger.error(error_message)
130
+ logger.error(str(e))
131
+ update_submission_status(params, "failed")
132
+ pause_space(params)
133
+
134
+ return wrapper