Abhishek Thakur commited on
Commit
8ec4d2d
Β·
1 Parent(s): c941736

submissions work

Browse files
.env.example CHANGED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ HF_ACCESS_TOKEN=hf_app_XXX
2
+ AUTOTRAIN_USERNAME=autoevaluator
3
+ AUTOTRAIN_TOKEN=hf_XXX
4
+ AUTOTRAIN_BACKEND_API=https://api.autotrain.huggingface.co
5
+ MOONLANDING_URL=https://huggingface.co
6
+ SUBMISSION_LIMIT=5
__init__.py CHANGED
@@ -1 +1,3 @@
 
 
1
  __version__ = "0.0.1"
 
1
+ import config
2
+
3
  __version__ = "0.0.1"
config.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from pathlib import Path
4
+
5
+ if Path(".env").is_file():
6
+ load_dotenv(".env")
7
+
8
+
9
+ MOONLANDING_URL = os.getenv("MOONLANDING_URL")
10
+ COMPETITION_ID = os.getenv("COMPETITION_ID")
11
+ DUMMY_DATA_PATH = os.getenv("DUMMY_DATA_PATH")
12
+ AUTOTRAIN_USERNAME = os.getenv("AUTOTRAIN_USERNAME")
13
+ AUTOTRAIN_TOKEN = os.getenv("AUTOTRAIN_TOKEN")
14
+ HF_ACCESS_TOKEN = os.getenv("HF_ACCESS_TOKEN")
15
+ AUTOTRAIN_BACKEND_API = os.getenv("AUTOTRAIN_BACKEND_API")
16
+ SUBMISSION_LIMIT = int(os.getenv("SUBMISSION_LIMIT"))
pages/3_πŸ”₯_New Submission.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from huggingface_hub import HfApi, CommitOperationAdd
3
+ import uuid
4
+ import os
5
+ import pandas as pd
6
+ import utils
7
+ import config
8
+
9
+ SUBMISSION_TEXT = """You can make upto 5 submissions per day.
10
+ The test data has been divided into public and private splits.
11
+ Your score on the public split will be shown on the leaderboard.
12
+ Your final score will be based on your private split performance.
13
+ The final rankings will be based on the private split performance.
14
+ """
15
+
16
+ SUBMISSION_ERROR = """Submission is not in a proper format.
17
+ Please check evaluation instructions for more details."""
18
+
19
+
20
+ def app():
21
+ st.set_page_config(page_title="New Submissions", page_icon="πŸ€—")
22
+ st.write("## New Submission")
23
+ st.markdown(SUBMISSION_TEXT)
24
+ uploaded_file = st.file_uploader("Choose a file")
25
+ # user token
26
+ user_token = st.text_input("Enter your token", value="", type="password")
27
+ user_token = user_token.strip()
28
+ if uploaded_file is not None and user_token != "":
29
+ # verify token
30
+ user_info = utils.user_authentication(token=user_token)
31
+ print(user_info)
32
+ if "error" in user_info:
33
+ st.error("Invalid token")
34
+ return
35
+
36
+ if user_info["emailVerified"] is False:
37
+ st.error("Please verify your email on Hugging Face Hub")
38
+ return
39
+
40
+ # check if user can submit to the competition
41
+ if utils.check_user_submission_limit(user_info) is False:
42
+ st.error("You have reached your submission limit for today")
43
+ return
44
+
45
+ bytes_data = uploaded_file.getvalue()
46
+ # verify file is valid
47
+ if not utils.verify_submission(bytes_data):
48
+ st.error("Invalid submission")
49
+ st.write(SUBMISSION_ERROR)
50
+ # write a horizontal html line
51
+ st.markdown("<hr/>", unsafe_allow_html=True)
52
+ else:
53
+ with st.spinner("Uploading submission..."):
54
+ user_id = user_info["id"]
55
+ submission_id = str(uuid.uuid4())
56
+ file_extension = uploaded_file.name.split(".")[-1]
57
+ # upload file to hf hub
58
+ api = HfApi()
59
+ api.upload_file(
60
+ path_or_fileobj=bytes_data,
61
+ path_in_repo=f"submissions/{user_id}-{submission_id}.{file_extension}",
62
+ repo_id=config.COMPETITION_ID,
63
+ repo_type="dataset",
64
+ token=config.AUTOTRAIN_TOKEN,
65
+ )
66
+ with st.spinner("Creating submission..."):
67
+ # update submission limit
68
+ submissions_made = utils.increment_submissions(
69
+ user_id=user_id,
70
+ submission_id=submission_id,
71
+ submission_comment="",
72
+ )
73
+ st.success(
74
+ f"Upload successful! You have {config.SUBMISSION_LIMIT - submissions_made} submissions left for today."
75
+ )
76
+
77
+ with st.spinner("Scheuling submission for evaluation..."):
78
+ # schedule submission for evaluation
79
+ utils.create_project(
80
+ project_id=submission_id,
81
+ submission_dataset=f"{config.COMPETITION_ID}",
82
+ model="dummy",
83
+ dataset=user_id,
84
+ )
85
+ st.success("Submission scheduled for evaluation")
86
+
87
+
88
+ if __name__ == "__main__":
89
+ app()
pages/3_πŸ”₯_Submissions.py DELETED
@@ -1,92 +0,0 @@
1
- import streamlit as st
2
- from huggingface_hub import HfApi, CommitOperationAdd
3
- import uuid
4
- import os
5
- import pandas as pd
6
-
7
- SUBMISSION_TEXT = """You can make upto 5 submissions per day.
8
- The test data has been divided into public and private splits.
9
- Your score on the public split will be shown on the leaderboard.
10
- Your final score will be based on your private split performance.
11
- The final rankings will be based on the private split performance.
12
- """
13
-
14
- SUBMISSION_ERROR = """Submission is not in a proper format.
15
- Please check evaluation instructions for more details."""
16
-
17
- COMPETITION_ID = os.getenv("COMPETITION_ID", "sample_competition")
18
- USER_ID = os.getenv("USER_ID", "sample_user")
19
- DUMMY_DATA_PATH = os.getenv("DUMMY_DATA_PATH", "autoevaluator/benchmark-dummy-data")
20
-
21
-
22
- def verify_submission(submission):
23
- # verify submission is valid
24
- return True
25
-
26
-
27
- def fetch_submissions():
28
- submissions = [
29
- {
30
- "submission_id": "72836-23423",
31
- "score": 0.7,
32
- "created_at": "2021-01-01T00:00:00Z",
33
- },
34
- {
35
- "submission_id": "23-42332",
36
- "score": 0.5,
37
- "created_at": "2021-01-01T00:00:00Z",
38
- },
39
- ]
40
- df = pd.DataFrame(submissions)
41
- return df
42
-
43
-
44
- def app():
45
- st.set_page_config(page_title="Submissions", page_icon="πŸ€—")
46
- st.write("## Submissions")
47
- uploaded_file = st.sidebar.file_uploader("Choose a file")
48
- if uploaded_file is not None:
49
- bytes_data = uploaded_file.getvalue()
50
- # verify file is valid
51
- if not verify_submission(bytes_data):
52
- st.error("Invalid submission")
53
- st.write(SUBMISSION_ERROR)
54
- # write a horizontal html line
55
- st.markdown("<hr/>", unsafe_allow_html=True)
56
- else:
57
- # start progress bar
58
- progress_bar = st.progress(0)
59
- submission_id = str(uuid.uuid4())
60
- api = HfApi()
61
- operations = [
62
- CommitOperationAdd(
63
- path_in_repo="submission.csv",
64
- path_or_fileobj=bytes_data,
65
- ),
66
- ]
67
- # update progress bar
68
- progress_bar.progress(0.5)
69
- api.create_repo(
70
- repo_id=submission_id,
71
- private=True,
72
- repo_type="dataset",
73
- )
74
- api.create_commit(
75
- repo_id=f"{USER_ID}/{submission_id}",
76
- operations=operations,
77
- commit_message="add submission.csv",
78
- repo_type="dataset",
79
- )
80
-
81
- st.markdown(SUBMISSION_TEXT)
82
- # add submissions history table
83
- st.write("### Submissions History")
84
- submissions = fetch_submissions()
85
- if len(submissions) == 0:
86
- st.write("You have not made any submissions yet.")
87
- else:
88
- st.write(submissions)
89
-
90
-
91
- if __name__ == "__main__":
92
- app()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pages/4_⭐️_Submission History.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from huggingface_hub import HfApi, CommitOperationAdd
3
+ import uuid
4
+ import os
5
+ import pandas as pd
6
+ import utils
7
+ import config
8
+
9
+ SUBMISSION_TEXT = """You can select upto 2 submissions for private leaderboard.
10
+ """
11
+
12
+
13
+ def app():
14
+ st.set_page_config(page_title="Submission History", page_icon="πŸ€—")
15
+ st.write("## Your Submissions")
16
+ st.markdown(SUBMISSION_TEXT)
17
+ # user token
18
+ user_token = st.text_input("Enter your token", value="", type="password")
19
+ user_token = user_token.strip()
20
+ if user_token != "":
21
+ user_info = utils.user_authentication(token=user_token)
22
+ if "error" in user_info:
23
+ st.error("Invalid token")
24
+ return
25
+
26
+ if user_info["emailVerified"] is False:
27
+ st.error("Please verify your email on Hugging Face Hub")
28
+ return
29
+
30
+ # get user submissions
31
+ user_id = user_info["id"]
32
+ user_submissions = utils.fetch_submissions(user_id)
33
+ submissions_df = pd.DataFrame(user_submissions)
34
+ st.write(submissions_df)
35
+
36
+
37
+ if __name__ == "__main__":
38
+ app()
utils.py ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import requests
3
+ import config
4
+ from huggingface_hub import HfApi, hf_hub_download
5
+ import json
6
+ import datetime
7
+ import io
8
+ from huggingface_hub.utils._errors import EntryNotFoundError
9
+
10
+
11
+ def get_auth_headers(token: str, prefix: str = "Bearer"):
12
+ return {"Authorization": f"{prefix} {token}"}
13
+
14
+
15
+ def http_post(path: str, token: str, payload=None, domain: str = None, params=None) -> requests.Response:
16
+ """HTTP POST request to the AutoNLP API, raises UnreachableAPIError if the API cannot be reached"""
17
+ try:
18
+ response = requests.post(
19
+ url=domain + path, json=payload, headers=get_auth_headers(token=token), allow_redirects=True, params=params
20
+ )
21
+ except requests.exceptions.ConnectionError:
22
+ print("❌ Failed to reach AutoNLP API, check your internet connection")
23
+ response.raise_for_status()
24
+ return response
25
+
26
+
27
+ def http_get(path: str, token: str, domain: str = None) -> requests.Response:
28
+ """HTTP POST request to the AutoNLP API, raises UnreachableAPIError if the API cannot be reached"""
29
+ try:
30
+ response = requests.get(url=domain + path, headers=get_auth_headers(token=token), allow_redirects=True)
31
+ except requests.exceptions.ConnectionError:
32
+ print("❌ Failed to reach AutoNLP API, check your internet connection")
33
+ response.raise_for_status()
34
+ return response
35
+
36
+
37
+ def create_project(project_id, submission_dataset, model, dataset):
38
+ project_config = {}
39
+ project_config["dataset_name"] = "lewtun/imdb-dummy"
40
+ project_config["dataset_config"] = "lewtun--imdb-dummy"
41
+ project_config["dataset_split"] = "train"
42
+ project_config["col_mapping"] = {"text": "text", "label": "target"}
43
+
44
+ payload = {
45
+ "username": config.AUTOTRAIN_USERNAME,
46
+ "proj_name": project_id,
47
+ "task": 1,
48
+ "config": {
49
+ "language": "en",
50
+ "max_models": 5,
51
+ "benchmark": {
52
+ "dataset": dataset,
53
+ "model": model,
54
+ "submission_dataset": submission_dataset,
55
+ },
56
+ },
57
+ }
58
+
59
+ project_json_resp = http_post(
60
+ path="/projects/create", payload=payload, token=config.AUTOTRAIN_TOKEN, domain=config.AUTOTRAIN_BACKEND_API
61
+ ).json()
62
+ print(project_json_resp)
63
+ time.sleep(5)
64
+ # Upload data
65
+ payload = {
66
+ "split": 4,
67
+ "col_mapping": project_config["col_mapping"],
68
+ "load_config": {"max_size_bytes": 0, "shuffle": False},
69
+ "dataset_id": project_config["dataset_name"],
70
+ "dataset_config": project_config["dataset_config"],
71
+ "dataset_split": project_config["dataset_split"],
72
+ }
73
+
74
+ data_json_resp = http_post(
75
+ path=f"/projects/{project_json_resp['id']}/data/dataset",
76
+ payload=payload,
77
+ token=config.AUTOTRAIN_TOKEN,
78
+ domain=config.AUTOTRAIN_BACKEND_API,
79
+ ).json()
80
+ print("πŸ’ΎπŸ’ΎπŸ’Ύ Dataset creation πŸ’ΎπŸ’ΎπŸ’Ύ")
81
+ print(data_json_resp)
82
+
83
+ # Process data
84
+ data_proc_json_resp = http_post(
85
+ path=f"/projects/{project_json_resp['id']}/data/start_processing",
86
+ token=config.AUTOTRAIN_TOKEN,
87
+ domain=config.AUTOTRAIN_BACKEND_API,
88
+ ).json()
89
+ print(f"πŸͺ Start data processing response: {data_proc_json_resp}")
90
+
91
+ print("⏳ Waiting for data processing to complete ...")
92
+ is_data_processing_success = False
93
+ while is_data_processing_success is not True:
94
+ project_status = http_get(
95
+ path=f"/projects/{project_json_resp['id']}",
96
+ token=config.AUTOTRAIN_TOKEN,
97
+ domain=config.AUTOTRAIN_BACKEND_API,
98
+ ).json()
99
+ # See database.database.enums.ProjectStatus for definitions of `status`
100
+ if project_status["status"] == 3:
101
+ is_data_processing_success = True
102
+ print("βœ… Data processing complete!")
103
+ time.sleep(10)
104
+
105
+ # Approve training job
106
+ train_job_resp = http_post(
107
+ path=f"/projects/{project_json_resp['id']}/start_training",
108
+ token=config.AUTOTRAIN_TOKEN,
109
+ domain=config.AUTOTRAIN_BACKEND_API,
110
+ ).json()
111
+ print(f"πŸƒ Training job approval response: {train_job_resp}")
112
+
113
+
114
+ def user_authentication(token):
115
+ headers = {}
116
+ cookies = {}
117
+ if token.startswith("hf_"):
118
+ headers["Authorization"] = f"Bearer {token}"
119
+ else:
120
+ cookies = {"token": token}
121
+ try:
122
+ response = requests.get(
123
+ config.MOONLANDING_URL + "/api/whoami-v2",
124
+ headers=headers,
125
+ cookies=cookies,
126
+ timeout=3,
127
+ )
128
+ except (requests.Timeout, ConnectionError) as err:
129
+ print(f"Failed to request whoami-v2 - {repr(err)}")
130
+ raise Exception("Hugging Face Hub is unreachable, please try again later.")
131
+ return response.json()
132
+
133
+
134
+ def add_new_user(user_info):
135
+ api = HfApi()
136
+ user_submission_info = {}
137
+ user_submission_info["name"] = user_info["name"]
138
+ user_submission_info["id"] = user_info["id"]
139
+ user_submission_info["submissions"] = []
140
+ # convert user_submission_info to BufferedIOBase file object
141
+ user_submission_info_json = json.dumps(user_submission_info)
142
+ user_submission_info_json_bytes = user_submission_info_json.encode("utf-8")
143
+ user_submission_info_json_buffer = io.BytesIO(user_submission_info_json_bytes)
144
+
145
+ api.upload_file(
146
+ path_or_fileobj=user_submission_info_json_buffer,
147
+ path_in_repo=f"{user_info['id']}.json",
148
+ repo_id=config.COMPETITION_ID,
149
+ repo_type="dataset",
150
+ token=config.AUTOTRAIN_TOKEN,
151
+ )
152
+
153
+
154
+ def check_user_submission_limit(user_info):
155
+ user_id = user_info["id"]
156
+ try:
157
+ user_fname = hf_hub_download(
158
+ repo_id=config.COMPETITION_ID,
159
+ filename=f"{user_id}.json",
160
+ use_auth_token=config.AUTOTRAIN_TOKEN,
161
+ repo_type="dataset",
162
+ )
163
+ except EntryNotFoundError:
164
+ add_new_user(user_info)
165
+ user_fname = hf_hub_download(
166
+ repo_id=config.COMPETITION_ID,
167
+ filename=f"{user_id}.json",
168
+ use_auth_token=config.AUTOTRAIN_TOKEN,
169
+ repo_type="dataset",
170
+ )
171
+ except Exception as e:
172
+ print(e)
173
+ raise Exception("Hugging Face Hub is unreachable, please try again later.")
174
+
175
+ with open(user_fname, "r") as f:
176
+ user_submission_info = json.load(f)
177
+
178
+ todays_date = datetime.datetime.now().strftime("%Y-%m-%d")
179
+ if len(user_submission_info["submissions"]) == 0:
180
+ user_submission_info["submissions"] = []
181
+
182
+ # count the number of times user has submitted today
183
+ todays_submissions = 0
184
+ for sub in user_submission_info["submissions"]:
185
+ if sub["date"] == todays_date:
186
+ todays_submissions += 1
187
+ if todays_submissions >= config.SUBMISSION_LIMIT:
188
+ return False
189
+ return True
190
+
191
+
192
+ def increment_submissions(user_id, submission_id, submission_comment):
193
+ user_fname = hf_hub_download(
194
+ repo_id=config.COMPETITION_ID,
195
+ filename=f"{user_id}.json",
196
+ use_auth_token=config.AUTOTRAIN_TOKEN,
197
+ repo_type="dataset",
198
+ )
199
+ with open(user_fname, "r") as f:
200
+ user_submission_info = json.load(f)
201
+ todays_date = datetime.datetime.now().strftime("%Y-%m-%d")
202
+ user_submission_info["submissions"].append(
203
+ {
204
+ "date": todays_date,
205
+ "submission_id": submission_id,
206
+ "submission_comment": submission_comment,
207
+ "status": "pending",
208
+ }
209
+ )
210
+ # count the number of times user has submitted today
211
+ todays_submissions = 0
212
+ for sub in user_submission_info["submissions"]:
213
+ if sub["date"] == todays_date:
214
+ todays_submissions += 1
215
+ # convert user_submission_info to BufferedIOBase file object
216
+ user_submission_info_json = json.dumps(user_submission_info)
217
+ user_submission_info_json_bytes = user_submission_info_json.encode("utf-8")
218
+ user_submission_info_json_buffer = io.BytesIO(user_submission_info_json_bytes)
219
+ api = HfApi()
220
+ api.upload_file(
221
+ path_or_fileobj=user_submission_info_json_buffer,
222
+ path_in_repo=f"{user_id}.json",
223
+ repo_id=config.COMPETITION_ID,
224
+ repo_type="dataset",
225
+ token=config.AUTOTRAIN_TOKEN,
226
+ )
227
+ return todays_submissions
228
+
229
+
230
+ def verify_submission(bytes_data):
231
+ return True
232
+
233
+
234
+ def fetch_submissions(user_id):
235
+ user_fname = hf_hub_download(
236
+ repo_id=config.COMPETITION_ID,
237
+ filename=f"{user_id}.json",
238
+ use_auth_token=config.AUTOTRAIN_TOKEN,
239
+ repo_type="dataset",
240
+ )
241
+ with open(user_fname, "r") as f:
242
+ user_submission_info = json.load(f)
243
+ return user_submission_info["submissions"]