import yaml from datasets import load_dataset import pandas as pd import os import pprint def make_dataset(dataset="cnn_dailymail", split="train"): """make dataset for summarisation""" os.makedirs('data/raw') dataset = load_dataset(dataset, '3.0.0', split=split) if not os.path.exists("data/raw"): df = pd.DataFrame() df['article'] = dataset['article'] df['highlights'] = dataset['highlights'] df.to_csv('data/raw/{}.csv'.format(split)) if __name__ == '__main__': with open("params.yml") as f: params = yaml.safe_load(f) pprint.pprint(params) make_dataset(dataset=params['data'], split='train') make_dataset(dataset=params['data'], split='test') make_dataset(dataset=params['data'], split='validation')