Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,259 Bytes
69defc9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import json
from data_processing import read_pianoroll, mid_to_bars, get_maps
import torch
import pandas as pd
from tqdm import tqdm
from concurrent.futures import ProcessPoolExecutor
import time
from functools import partial
import os
""" Preprocessing Lakh MIDI pianoroll dataset.
Divides into bars. Encodes into tuples. Makes transposing easier. """
def run(f, my_iter):
with ProcessPoolExecutor(max_workers=16) as executor:
results = list(tqdm(executor.map(f, my_iter), total=len(my_iter)))
return results
def get_emotion_dict(path):
table = pd.read_csv(path)
table = table.to_dict(orient="records")
table = {item["path"].split("/")[-2]: \
{"valence": item["valence"], "energy": item["energy"], "tempo": item["tempo"]} \
for item in table}
return table
def process(pr_path, event_sym2idx):
time.sleep(0.001)
mid = read_pianoroll(pr_path)
bars = mid_to_bars(mid, event_sym2idx)
file_ = pr_path.split("/")[-1]
item_data = {
"file": file_,
"bars": bars,
}
return item_data
def main():
main_dir = "../../data_files/lpd_5"
input_dir = "../../data_files/lpd_5/lpd_5_full"
unique_pr_list_file = "../../data_files/features/pianoroll/unique_files.json"
output_dir = os.path.join(main_dir, "lpd_5_full_transposable")
os.makedirs(output_dir, exist_ok=True)
output_maps_path = os.path.join(main_dir, "maps.pt")
with open(unique_pr_list_file, "r") as f:
pr_paths = json.load(f)
pr_paths = [os.path.join(input_dir, pr_path[0], pr_path + ".npz") for pr_path in pr_paths]
maps = get_maps()
func = partial(process, event_sym2idx=maps["event2idx"])
os.makedirs(output_dir, exist_ok=True)
x = run(func, pr_paths)
x = [item for item in x if item["bars"] is not None]
for i in tqdm(range(len(x))):
for j in range(len(x[i]["bars"])):
x[i]["bars"][j] = torch.from_numpy(x[i]["bars"][j])
fname = x[i]["file"]
output_path = os.path.join(output_dir, fname.replace(".npz", ".pt"))
torch.save(x[i], output_path)
torch.save(maps, output_maps_path)
if __name__ == "__main__":
main()
|