File size: 1,480 Bytes
84deff7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# huggingface_dataset_manager.py
from datasets import load_dataset, Dataset
from typing import List, Dict, Any
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

class HuggingFaceDatasetManager:
    def __init__(self, dataset_name: str):
        self.dataset_name = dataset_name

    def persist_to_dataset(self, metadata_list: List[Dict[str, Any]]):
        if not metadata_list:
            logging.warning("No metadata to persist.")
            return
        try:
            dataset = load_dataset(self.dataset_name)
            new_dataset = Dataset.from_dict({k: [d[k] for d in metadata_list] for k in metadata_list[0]})
            dataset = dataset.add_item(new_dataset)
            dataset.push_to_hub(self.dataset_name)
            logging.info(f"Updated and pushed dataset: {self.dataset_name}")
        except Exception as e:
            logging.error(f"Error persisting to dataset: {str(e)}")

    def update_dataset(self, new_data: List[Dict[str, Any]]):
        try:
            dataset = load_dataset(self.dataset_name)
            new_dataset = Dataset.from_dict({k: [d[k] for d in new_data] for k in new_data[0]})
            dataset = dataset.add_item(new_dataset)
            dataset.push_to_hub(self.dataset_name)
            logging.info(f"Updated Hugging Face dataset: {self.dataset_name}")
        except Exception as e:
            logging.error(f"Error updating Hugging Face dataset: {str(e)}")