not-lain commited on
Commit
7bc6ba8
·
1 Parent(s): 50e0fff

switch to usernameDOTparquet

Browse files
Files changed (3) hide show
  1. app.py +27 -19
  2. certificates +1 -0
  3. data_to_parquet.py +45 -0
app.py CHANGED
@@ -3,12 +3,13 @@ from datetime import datetime
3
  import random
4
 
5
  import pandas as pd
6
- from huggingface_hub import HfApi, hf_hub_download, Repository
7
  from huggingface_hub.repocard import metadata_load
8
 
9
  import gradio as gr
10
  from datasets import load_dataset, Dataset
11
- from huggingface_hub import whoami
 
12
 
13
  EXAM_DATASET_ID = os.getenv("EXAM_DATASET_ID") or "agents-course/unit_1_quiz"
14
  EXAM_MAX_QUESTIONS = os.getenv("EXAM_MAX_QUESTIONS") or 10
@@ -22,7 +23,7 @@ CERTIFIED_USERS_DIR = "certificates"
22
  repo = Repository(
23
  local_dir=CERTIFIED_USERS_DIR, clone_from=DATASET_REPO_URL, use_auth_token=os.getenv("HF_TOKEN")
24
  )
25
-
26
  # Convert dataset to a list of dicts and randomly sort
27
  quiz_data = ds.to_pandas().to_dict("records")
28
  random.shuffle(quiz_data)
@@ -109,23 +110,30 @@ def push_results_to_hub(user_answers, token: gr.OAuthToken | None):
109
 
110
  user_info = whoami(token=token.token)
111
  repo_id = f"{EXAM_DATASET_ID}_student_responses"
112
- submission_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
113
-
114
- new_ds = Dataset.from_list(user_answers)
115
- new_ds = new_ds.map(
116
- lambda x: {
117
- "username": user_info["name"],
118
- "datetime": submission_time,
119
- "grade": grade,
120
- }
121
- )
122
- new_ds.push_to_hub(repo_id=repo_id, split=user_info["name"])
123
 
124
- # I'm adding a csv version
125
- # The idea, if the user passed, we create a simple row in a csv
126
- print("ADD CERTIFIED USER")
127
- # Add this user to our database
128
- add_certified_user(user_info["name"], grade, submission_time)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
  return f"Your responses have been submitted to the Hub! Final grade: {grade:.1%}"
131
 
 
3
  import random
4
 
5
  import pandas as pd
6
+ from huggingface_hub import HfApi, hf_hub_download, Repository, whoami
7
  from huggingface_hub.repocard import metadata_load
8
 
9
  import gradio as gr
10
  from datasets import load_dataset, Dataset
11
+
12
+ from .data_to_parquet import to_parquet
13
 
14
  EXAM_DATASET_ID = os.getenv("EXAM_DATASET_ID") or "agents-course/unit_1_quiz"
15
  EXAM_MAX_QUESTIONS = os.getenv("EXAM_MAX_QUESTIONS") or 10
 
23
  repo = Repository(
24
  local_dir=CERTIFIED_USERS_DIR, clone_from=DATASET_REPO_URL, use_auth_token=os.getenv("HF_TOKEN")
25
  )
26
+ upload_api = HfApi(token=os.getenv("HF_TOKEN"))
27
  # Convert dataset to a list of dicts and randomly sort
28
  quiz_data = ds.to_pandas().to_dict("records")
29
  random.shuffle(quiz_data)
 
110
 
111
  user_info = whoami(token=token.token)
112
  repo_id = f"{EXAM_DATASET_ID}_student_responses"
113
+ # TODO:
114
+ # check if username already has "username.parquet" in the dataset and download that (or read values directly from dataset viewer if possible)
115
+ # instead of replacing the values check if the new score is better than the old one
116
+ to_parquet(upload_api,"not-lain/testing-my-upload",user_info["name"],grade,0,0,0)
 
 
 
 
 
 
 
117
 
118
+
119
+ # # backward compatibility
120
+ # submission_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
121
+
122
+ # new_ds = Dataset.from_list(user_answers)
123
+ # new_ds = new_ds.map(
124
+ # lambda x: {
125
+ # "username": user_info["name"],
126
+ # "datetime": submission_time,
127
+ # "grade": grade,
128
+ # }
129
+ # )
130
+ # new_ds.push_to_hub(repo_id=repo_id, split=user_info["name"])
131
+
132
+ # # I'm adding a csv version
133
+ # # The idea, if the user passed, we create a simple row in a csv
134
+ # print("ADD CERTIFIED USER")
135
+ # # Add this user to our database
136
+ # add_certified_user(user_info["name"], grade, submission_time)
137
 
138
  return f"Your responses have been submitted to the Hub! Final grade: {grade:.1%}"
139
 
certificates ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit ced270b6174504dfb1e3850ff15cd0aeae25152e
data_to_parquet.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pyarrow as pa
2
+ import pyarrow.parquet as pq
3
+ from huggingface_hub.hf_api import HfApi
4
+ from huggingface_hub import whoami
5
+ import json
6
+ import tempfile
7
+
8
+
9
+ # current schema (refer to https://huggingface.co/spaces/phxia/dataset-builder/blob/main/dataset_uploader.py#L153 for more info)
10
+ schema = {'username': {'_type': 'Value', 'dtype': 'string'},
11
+ 'unit1': {'_type': 'Value', 'dtype': 'float64'},
12
+ 'unit2': {'_type': 'Value', 'dtype': 'float64'},
13
+ 'unit3': {'_type': 'Value', 'dtype': 'float64'},
14
+ 'unit4': {'_type': 'Value', 'dtype': 'float64'},
15
+ 'certified': {'_type': 'Value', 'dtype': 'int64'},
16
+ }
17
+
18
+
19
+
20
+ def to_parquet(api,repo,username="",unit1=0.,unit2=0.,unit3=0.,unit4=0.,certified=0):
21
+ data = {
22
+ "username": username,
23
+ "unit1": unit1,
24
+ "unit2" : unit2,
25
+ "unit3" : unit3,
26
+ "unit4" : unit4,
27
+ "certified" : certified,
28
+ }
29
+ # Export data to Arrow format
30
+ table = pa.Table.from_pylist([data])
31
+ # Add metadata (used by datasets library)
32
+ table = table.replace_schema_metadata(
33
+ {"huggingface": json.dumps({"info": {"features": schema}})}
34
+ )
35
+ # Write to parquet file
36
+ archive_file = tempfile.NamedTemporaryFile(delete=False)
37
+ pq.write_table(table, archive_file.name)
38
+ archive_file.close()
39
+
40
+ api.upload_file(
41
+ repo_id=repo, # manually created repo
42
+ repo_type="dataset",
43
+ path_in_repo=f"{username}.parquet", # each user will have their own parquet
44
+ path_or_fileobj=archive_file.name,
45
+ )