File size: 1,418 Bytes
2b6be77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import os
import datasets
import pandas as pd
from datetime import datetime

from config import BACKUP_FOLDER, HF_DATASET_REPO_ID, HF_TOKEN, RESULTS_CSV_FILE, CSV_HEADERS

def main():
    """
    Gets the dataset from HF Hub where preferences are being collected,
    save it locally to a backup folder with a timestamp.
    Then creates an empty dataset with the same structure and saves it to the HF Hub.
    """
    print(f"Attempting to load dataset '{HF_DATASET_REPO_ID}' from Hugging Face Hub (file: {RESULTS_CSV_FILE})...")
    dataset = datasets.load_dataset(HF_DATASET_REPO_ID, data_files=RESULTS_CSV_FILE, token=HF_TOKEN, split='train')
    print(f"Successfully loaded dataset. It has {len(dataset)} entries.")
    dataset_df = dataset.to_pandas()


    # 2. Save it locally to a backup folder with a timestamp
    if not os.path.exists(BACKUP_FOLDER):
        os.makedirs(BACKUP_FOLDER)
        print(f"Created backup folder: {BACKUP_FOLDER}")

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    backup_filename = f"preferences_backup_{timestamp}.csv"
    backup_filepath = os.path.join(BACKUP_FOLDER, backup_filename)
    try:
        dataset_df.to_csv(backup_filepath, index=False)
        print(f"Successfully backed up current preferences to: {backup_filepath}")
    except Exception as e:
        print(f"Error saving backup to {backup_filepath}: {e}")


if __name__ == "__main__":
    main()