File size: 998 Bytes
778b735
19ab6fa
778b735
19ab6fa
778b735
19ab6fa
778b735
19ab6fa
 
 
 
 
 
 
 
 
 
 
 
 
 
778b735
19ab6fa
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from datasets import Dataset
from huggingface_hub import HfApi
from config import DATASET_NAME
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def initialize_dataset():
    # Initialize an empty dataset with the expected structure
    initial_data = {
        "entry_id": [],
        "title": [],
        "authors": [],
        "published": [],
        "updated": [],
        "pdf_url": [],
        "summary": [],
        "categories": [],
        "primary_category": [],
        "html_url": []
    }

    # Create the dataset
    dataset = Dataset.from_dict(initial_data)

    try:
        # Push the initial dataset to the Hub
        dataset.push_to_hub(DATASET_NAME, split="train")
        logging.info(f"Dataset {DATASET_NAME} initialized successfully with 'train' split.")
    except Exception as e:
        logging.error(f"Failed to initialize dataset: {str(e)}")
        raise

if __name__ == "__main__":
    initialize_dataset()