Spaces:
Paused
Paused
File size: 998 Bytes
778b735 19ab6fa 778b735 19ab6fa 778b735 19ab6fa 778b735 19ab6fa 778b735 19ab6fa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
from datasets import Dataset
from huggingface_hub import HfApi
from config import DATASET_NAME
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def initialize_dataset():
# Initialize an empty dataset with the expected structure
initial_data = {
"entry_id": [],
"title": [],
"authors": [],
"published": [],
"updated": [],
"pdf_url": [],
"summary": [],
"categories": [],
"primary_category": [],
"html_url": []
}
# Create the dataset
dataset = Dataset.from_dict(initial_data)
try:
# Push the initial dataset to the Hub
dataset.push_to_hub(DATASET_NAME, split="train")
logging.info(f"Dataset {DATASET_NAME} initialized successfully with 'train' split.")
except Exception as e:
logging.error(f"Failed to initialize dataset: {str(e)}")
raise
if __name__ == "__main__":
initialize_dataset() |