File size: 2,259 Bytes
69defc9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import json
from data_processing import read_pianoroll, mid_to_bars, get_maps
import torch
import pandas as pd
from tqdm import tqdm
from concurrent.futures import ProcessPoolExecutor
import time
from functools import partial
import os

""" Preprocessing Lakh MIDI pianoroll dataset.
Divides into bars. Encodes into tuples. Makes transposing easier. """

def run(f, my_iter):
    with ProcessPoolExecutor(max_workers=16) as executor:
        results = list(tqdm(executor.map(f, my_iter), total=len(my_iter)))
    return results

def get_emotion_dict(path):
    table = pd.read_csv(path)
    table = table.to_dict(orient="records")
    table = {item["path"].split("/")[-2]: \
                 {"valence": item["valence"], "energy": item["energy"], "tempo": item["tempo"]} \
                     for item in table}
    return table

def process(pr_path, event_sym2idx):
    time.sleep(0.001)
    mid = read_pianoroll(pr_path)

    bars = mid_to_bars(mid, event_sym2idx)

    file_ = pr_path.split("/")[-1]
    
    item_data = {
                "file": file_,
                "bars": bars, 
                 }

    return item_data

def main():

    main_dir = "../../data_files/lpd_5"
    input_dir = "../../data_files/lpd_5/lpd_5_full"
    unique_pr_list_file = "../../data_files/features/pianoroll/unique_files.json"

    output_dir = os.path.join(main_dir, "lpd_5_full_transposable")
    
    os.makedirs(output_dir, exist_ok=True)
    output_maps_path = os.path.join(main_dir, "maps.pt")

    with open(unique_pr_list_file, "r") as f:
        pr_paths = json.load(f)

    pr_paths = [os.path.join(input_dir, pr_path[0], pr_path + ".npz") for pr_path in pr_paths]

    maps = get_maps()
    
    func = partial(process, event_sym2idx=maps["event2idx"])

    os.makedirs(output_dir, exist_ok=True)

    x = run(func, pr_paths)
    x = [item for item in x if item["bars"] is not None]
    for i in tqdm(range(len(x))):
        for j in range(len(x[i]["bars"])):
            x[i]["bars"][j] = torch.from_numpy(x[i]["bars"][j])
        fname = x[i]["file"]
        output_path = os.path.join(output_dir, fname.replace(".npz", ".pt"))
        torch.save(x[i], output_path)

    torch.save(maps, output_maps_path)
    

if __name__ == "__main__":
    main()