import pyarrow as pa import pyarrow.parquet as pq from huggingface_hub.hf_api import HfApi from huggingface_hub import whoami import json import tempfile # current schema (refer to https://huggingface.co/spaces/phxia/dataset-builder/blob/main/dataset_uploader.py#L153 for more info) schema = {'username': {'_type': 'Value', 'dtype': 'string'}, 'unit1': {'_type': 'Value', 'dtype': 'float64'}, 'unit2': {'_type': 'Value', 'dtype': 'float64'}, 'unit3': {'_type': 'Value', 'dtype': 'float64'}, 'unit4': {'_type': 'Value', 'dtype': 'float64'}, 'certified': {'_type': 'Value', 'dtype': 'int64'}, } def to_parquet(api,repo,username="",unit1=0.,unit2=0.,unit3=0.,unit4=0.,certified=0): data = { "username": username, "unit1": float(unit1), "unit2" : float(unit2), "unit3" : float(unit3), "unit4" : float(unit4), "certified" : certified, } # Export data to Arrow format table = pa.Table.from_pylist([data]) # Add metadata (used by datasets library) table = table.replace_schema_metadata( {"huggingface": json.dumps({"info": {"features": schema}})} ) # Write to parquet file archive_file = tempfile.NamedTemporaryFile(delete=False) pq.write_table(table, archive_file.name) archive_file.close() api.upload_file( repo_id=repo, # manually created repo repo_type="dataset", path_in_repo=f"{username}.parquet", # each user will have their own parquet path_or_fileobj=archive_file.name, )