# huggingface_dataset_manager.py from datasets import load_dataset, Dataset from typing import List, Dict, Any import logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') class HuggingFaceDatasetManager: def __init__(self, dataset_name: str): self.dataset_name = dataset_name def persist_to_dataset(self, metadata_list: List[Dict[str, Any]]): if not metadata_list: logging.warning("No metadata to persist.") return try: dataset = load_dataset(self.dataset_name) new_dataset = Dataset.from_dict({k: [d[k] for d in metadata_list] for k in metadata_list[0]}) dataset = dataset.add_item(new_dataset) dataset.push_to_hub(self.dataset_name) logging.info(f"Updated and pushed dataset: {self.dataset_name}") except Exception as e: logging.error(f"Error persisting to dataset: {str(e)}") def update_dataset(self, new_data: List[Dict[str, Any]]): try: dataset = load_dataset(self.dataset_name) new_dataset = Dataset.from_dict({k: [d[k] for d in new_data] for k in new_data[0]}) dataset = dataset.add_item(new_dataset) dataset.push_to_hub(self.dataset_name) logging.info(f"Updated Hugging Face dataset: {self.dataset_name}") except Exception as e: logging.error(f"Error updating Hugging Face dataset: {str(e)}")