File size: 530 Bytes
778b735
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from datasets import Dataset
from config import DATASET_NAME
import huggingface_hub

# Initialize an empty dataset with the expected structure
initial_data = {
    "id": [],
    "title": [],
    "authors": [],
    "published": [],
    "updated": [],
    "pdf_url": [],
    "entry_id": [],
    "summary": [],
    "categories": [],
    "primary_category": [],
    "html_url": []
}

# Create the dataset
dataset = Dataset.from_dict(initial_data)

# Push the initial dataset to the Hub
dataset.push_to_hub(DATASET_NAME, split="train")