unit_1_quiz

Sleeping

App Files Files Community

not-lain commited on Feb 9

Commit

7bc6ba8

1 Parent(s): 50e0fff

switch to usernameDOTparquet

Browse files

Files changed (3) hide show

app.py +27 -19
certificates +1 -0
data_to_parquet.py +45 -0

app.py CHANGED Viewed

@@ -3,12 +3,13 @@ from datetime import datetime
 import random
 import pandas as pd
-from huggingface_hub import HfApi, hf_hub_download, Repository
 from huggingface_hub.repocard import metadata_load
 import gradio as gr
 from datasets import load_dataset, Dataset
-from huggingface_hub import whoami
 EXAM_DATASET_ID = os.getenv("EXAM_DATASET_ID") or "agents-course/unit_1_quiz"
 EXAM_MAX_QUESTIONS = os.getenv("EXAM_MAX_QUESTIONS") or 10
@@ -22,7 +23,7 @@ CERTIFIED_USERS_DIR = "certificates"
 repo = Repository(
     local_dir=CERTIFIED_USERS_DIR, clone_from=DATASET_REPO_URL, use_auth_token=os.getenv("HF_TOKEN")
 )
 # Convert dataset to a list of dicts and randomly sort
 quiz_data = ds.to_pandas().to_dict("records")
 random.shuffle(quiz_data)
@@ -109,23 +110,30 @@ def push_results_to_hub(user_answers, token: gr.OAuthToken | None):
     user_info = whoami(token=token.token)
     repo_id = f"{EXAM_DATASET_ID}_student_responses"
-    submission_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-    new_ds = Dataset.from_list(user_answers)
-    new_ds = new_ds.map(
-        lambda x: {
-            "username": user_info["name"],
-            "datetime": submission_time,
-            "grade": grade,
-        }
-    )
-    new_ds.push_to_hub(repo_id=repo_id, split=user_info["name"])
-    # I'm adding a csv version
-    # The idea, if the user passed, we create a simple row in a csv
-    print("ADD CERTIFIED USER")
-    # Add this user to our database
-    add_certified_user(user_info["name"], grade, submission_time)
     return f"Your responses have been submitted to the Hub! Final grade: {grade:.1%}"

 import random
 import pandas as pd
+from huggingface_hub import HfApi, hf_hub_download, Repository, whoami
 from huggingface_hub.repocard import metadata_load
 import gradio as gr
 from datasets import load_dataset, Dataset
+from .data_to_parquet import to_parquet
 EXAM_DATASET_ID = os.getenv("EXAM_DATASET_ID") or "agents-course/unit_1_quiz"
 EXAM_MAX_QUESTIONS = os.getenv("EXAM_MAX_QUESTIONS") or 10
 repo = Repository(
     local_dir=CERTIFIED_USERS_DIR, clone_from=DATASET_REPO_URL, use_auth_token=os.getenv("HF_TOKEN")
 )
+upload_api = HfApi(token=os.getenv("HF_TOKEN"))
 # Convert dataset to a list of dicts and randomly sort
 quiz_data = ds.to_pandas().to_dict("records")
 random.shuffle(quiz_data)
     user_info = whoami(token=token.token)
     repo_id = f"{EXAM_DATASET_ID}_student_responses"
+    # TODO:
+    # check if username already has "username.parquet" in the dataset and download that (or read values directly from dataset viewer if possible)
+    # instead of replacing the values check if the new score is better than the old one
+    to_parquet(upload_api,"not-lain/testing-my-upload",user_info["name"],grade,0,0,0)
+    # # backward compatibility
+    # submission_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    # new_ds = Dataset.from_list(user_answers)
+    # new_ds = new_ds.map(
+    #     lambda x: {
+    #         "username": user_info["name"],
+    #         "datetime": submission_time,
+    #         "grade": grade,
+    #     }
+    # )
+    # new_ds.push_to_hub(repo_id=repo_id, split=user_info["name"])
+    # # I'm adding a csv version
+    # # The idea, if the user passed, we create a simple row in a csv
+    # print("ADD CERTIFIED USER")
+    # # Add this user to our database
+    # add_certified_user(user_info["name"], grade, submission_time)
     return f"Your responses have been submitted to the Hub! Final grade: {grade:.1%}"

certificates ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit ced270b6174504dfb1e3850ff15cd0aeae25152e

data_to_parquet.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import pyarrow as pa
+import pyarrow.parquet as pq
+from huggingface_hub.hf_api import HfApi
+from huggingface_hub import whoami
+import json
+import tempfile
+# current schema (refer to https://huggingface.co/spaces/phxia/dataset-builder/blob/main/dataset_uploader.py#L153 for more info)
+schema = {'username': {'_type': 'Value', 'dtype': 'string'},
+ 'unit1': {'_type': 'Value', 'dtype': 'float64'},
+ 'unit2': {'_type': 'Value', 'dtype': 'float64'},
+ 'unit3': {'_type': 'Value', 'dtype': 'float64'},
+ 'unit4': {'_type': 'Value', 'dtype': 'float64'},
+ 'certified': {'_type': 'Value', 'dtype': 'int64'},
+ }
+def to_parquet(api,repo,username="",unit1=0.,unit2=0.,unit3=0.,unit4=0.,certified=0):
+    data = {
+        "username": username,
+        "unit1": unit1,
+        "unit2" : unit2,
+        "unit3" : unit3,
+        "unit4" : unit4,
+        "certified" : certified,
+    }
+    # Export data to Arrow format
+    table = pa.Table.from_pylist([data])
+    # Add metadata (used by datasets library)
+    table = table.replace_schema_metadata(
+                {"huggingface": json.dumps({"info": {"features": schema}})}
+            )
+    # Write to parquet file
+    archive_file = tempfile.NamedTemporaryFile(delete=False)
+    pq.write_table(table, archive_file.name)
+    archive_file.close()
+    api.upload_file(
+        repo_id=repo, # manually created repo
+        repo_type="dataset",
+        path_in_repo=f"{username}.parquet", # each user will have their own parquet
+        path_or_fileobj=archive_file.name,
+    )