Spaces:
Runtime error
Runtime error
| import yaml | |
| from datasets import load_dataset | |
| import pandas as pd | |
| import os | |
| import pprint | |
| def make_dataset(dataset="cnn_dailymail", split="train"): | |
| """make dataset for summarisation""" | |
| if not os.path.exists("data/raw"): | |
| os.makedirs("data/raw") | |
| dataset = load_dataset(dataset, "3.0.0", split=split) | |
| df = pd.DataFrame() | |
| df["article"] = dataset["article"] | |
| df["highlights"] = dataset["highlights"] | |
| df.to_csv("data/raw/{}.csv".format(split)) | |
| if __name__ == "__main__": | |
| with open("params.yml") as f: | |
| params = yaml.safe_load(f) | |
| pprint.pprint(params) | |
| make_dataset(dataset=params["data"], split="train") | |
| make_dataset(dataset=params["data"], split="test") | |
| make_dataset(dataset=params["data"], split="validation") | |