unit_1_quiz

Sleeping

App Files Files Community

not-lain commited on Feb 9

Commit

b20c417

verified ·

1 Parent(s): c6d4059

Update data_to_parquet.py

Browse files

Files changed (1) hide show

data_to_parquet.py +28 -21

data_to_parquet.py CHANGED Viewed

@@ -1,45 +1,52 @@
 import pyarrow as pa
 import pyarrow.parquet as pq
-from huggingface_hub.hf_api import HfApi
-from huggingface_hub import whoami
 import json
 import tempfile
 # current schema (refer to https://huggingface.co/spaces/phxia/dataset-builder/blob/main/dataset_uploader.py#L153 for more info)
-schema = {'username': {'_type': 'Value', 'dtype': 'string'},
- 'unit1': {'_type': 'Value', 'dtype': 'float64'},
- 'unit2': {'_type': 'Value', 'dtype': 'float64'},
- 'unit3': {'_type': 'Value', 'dtype': 'float64'},
- 'unit4': {'_type': 'Value', 'dtype': 'float64'},
- 'certified': {'_type': 'Value', 'dtype': 'int64'},
- }
-def to_parquet(api,repo,username="",unit1=0.,unit2=0.,unit3=0.,unit4=0.,certified=0):
     data = {
         "username": username,
-        "unit1": float(unit1),
-        "unit2" : float(unit2),
-        "unit3" : float(unit3),
-        "unit4" : float(unit4),
-        "certified" : certified,
     }
     # Export data to Arrow format
     table = pa.Table.from_pylist([data])
     # Add metadata (used by datasets library)
     table = table.replace_schema_metadata(
-                {"huggingface": json.dumps({"info": {"features": schema}})}
-            )
     # Write to parquet file
     archive_file = tempfile.NamedTemporaryFile(delete=False)
     pq.write_table(table, archive_file.name)
     archive_file.close()
     api.upload_file(
-        repo_id=repo, # manually created repo
         repo_type="dataset",
-        path_in_repo=f"{username}.parquet", # each user will have their own parquet
         path_or_fileobj=archive_file.name,
-    )

 import pyarrow as pa
 import pyarrow.parquet as pq
 import json
 import tempfile
 # current schema (refer to https://huggingface.co/spaces/phxia/dataset-builder/blob/main/dataset_uploader.py#L153 for more info)
+schema = {
+    "username": {"_type": "Value", "dtype": "string"},
+    "unit1": {"_type": "Value", "dtype": "float64"},
+    "unit2": {"_type": "Value", "dtype": "float64"},
+    "unit3": {"_type": "Value", "dtype": "float64"},
+    "unit4": {"_type": "Value", "dtype": "float64"},
+    "certified": {"_type": "Value", "dtype": "int64"},
+}
+def to_parquet(
+    api,
+    repo: str,
+    username: str = "",
+    unit1: float = 0.0,
+    unit2: float = 0.0,
+    unit3: float = 0.0,
+    unit4: float = 0.0,
+    certified: int = 0,
+):
     data = {
         "username": username,
+        "unit1": unit1 * 100 if unit1 != 0 else 0.0,
+        "unit2": unit2 * 100 if unit2 != 0 else 0.0,
+        "unit3": unit3 * 100 if unit3 != 0 else 0.0,
+        "unit4": unit4 * 100 if unit4 != 0 else 0.0,
+        "certified": certified,
     }
     # Export data to Arrow format
     table = pa.Table.from_pylist([data])
     # Add metadata (used by datasets library)
     table = table.replace_schema_metadata(
+        {"huggingface": json.dumps({"info": {"features": schema}})}
+    )
     # Write to parquet file
     archive_file = tempfile.NamedTemporaryFile(delete=False)
     pq.write_table(table, archive_file.name)
     archive_file.close()
     api.upload_file(
+        repo_id=repo,  # manually created repo
         repo_type="dataset",
+        path_in_repo=f"{username}.parquet",  # each user will have their own parquet
         path_or_fileobj=archive_file.name,
+    )