Spaces:
Runtime error
Runtime error
File size: 771 Bytes
1bf52db 1482fe1 fdcca24 c5a3a4d 504cec1 5fade0e d379f83 93aba5e 3424db2 0130907 24a726e 9d3af5f 5e833ac 17499bc 06d3c94 9531255 9116abe c990a96 e7b5de5 629049e 7be31bf bf1265d 59fd5bc c3adb5e 00177a6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
import yaml
from datasets import load_dataset
import pandas as pd
import os
import pprint
def make_dataset(dataset="cnn_dailymail", split="train"):
"""make dataset for summarisation"""
os.makedirs('data/raw')
dataset = load_dataset(dataset, '3.0.0', split=split)
if not os.path.exists("data/raw"):
df = pd.DataFrame()
df['article'] = dataset['article']
df['highlights'] = dataset['highlights']
df.to_csv('data/raw/{}.csv'.format(split))
if __name__ == '__main__':
with open("params.yml") as f:
params = yaml.safe_load(f)
pprint.pprint(params)
make_dataset(dataset=params['data'], split='train')
make_dataset(dataset=params['data'], split='test')
make_dataset(dataset=params['data'], split='validation')
|