File size: 871 Bytes
5b81931
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from datasets import load_dataset_builder, load_dataset
import logging 

def inspect():
    langs = ['amharic','english','hausa','swahili','yoruba','igbo']

    for lang in langs:
        ds_builder = load_dataset_builder("csebuetnlp/xlsum",lang)
        
        desc = ds_builder.info.description
        
        feat = ds_builder.info.features
        
        return desc,feat
    
def load():
    try:
        langs = ['amharic','hausa','swahili','yoruba','igbo']
        
        for lang in langs:

            dataset = load_dataset("csebuetnlp/xlsum", lang ,split="train")
            #for split, data in dataset.items():
            dataset.to_csv(f"{lang}.csv", index = None)
            #dataset.save_to_disk(lang) 
            #return dataset
    except Exception as ex:
        logging.debug(ex)

if __name__ == '__main__':
    #print(inspect())
    load()