File size: 6,633 Bytes
69defc9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import re
import hashlib
import json
import pypianoroll
import numpy as np
import pretty_midi
import csv

"""
You'll need a client ID and a client secret:
https://developer.spotify.com/dashboard/applications
Then, fill in the variables client_id and client_secret
"""

client_id = 'c520641b167a4cd0872d48e5232a41e6'
client_secret = 'a455993eda164da2b67462c2e1382e91'
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

def get_drums_note_density(mid):
    drum_mid = pretty_midi.PrettyMIDI()
    for instrument in mid.instruments:
        if instrument.is_drum:
            drum_mid.instruments.append(instrument)
    if len(drum_mid.instruments) != 1 or len(drum_mid.instruments[0].notes) == 0:
        return float("nan")
    else:
        start_time = drum_mid.instruments[0].notes[0].start
        end_time = drum_mid.instruments[0].notes[-1].end
        duration = end_time - start_time
        n_notes = len(drum_mid.instruments[0].notes)
        density = n_notes / duration
        return density

def get_md5(path):
    with open(path, "rb") as f:
        md5 = hashlib.md5(f.read()).hexdigest()
    return md5

def get_hash(path):
    if path[-4:] == ".mid":
        try:
            mid = pretty_midi.PrettyMIDI(path)
        except:
            return "empty_pianoroll"
        try:
            rolls = mid.get_piano_roll()
        except:
            return "empty_pianoroll"
        if rolls.size == 0:
            return "empty_pianoroll"
    else:
        pr = pypianoroll.load(path)
        tracks = sorted(pr.tracks, key=lambda x: x.name)
        rolls = [track.pianoroll for track in tracks if track.pianoroll.shape[0] > 0]
        if rolls == []:
            return "empty_pianoroll"
        rolls = np.concatenate(rolls, axis=-1)
    hash_ = hashlib.sha1(np.ascontiguousarray(rolls)).hexdigest()
    return hash_

def get_note_density(mid):
    duration = mid.get_end_time()
    n_notes = sum([1 for instrument in mid.instruments for note in instrument.notes])
    density = n_notes / duration
    return density

def get_tempo(mid):
    tick_scale = mid._tick_scales[-1][-1]
    resolution = mid.resolution
    beat_duration = tick_scale * resolution
    mid_tempo = 60 / beat_duration
    return mid_tempo

def get_n_instruments(mid):
    n_instruments = sum([1 for instrument in mid.instruments if instrument.notes != []])
    return n_instruments

def try_multiple(func, *args, **kwargs):
    n_max = 29
    n = 0
    failed = True
    while failed:
        if n > n_max:
            return None
        try:
            if args:
                out = func(*args)
            elif kwargs:
                out = func(**kwargs)
            failed = False
        except Exception as e:
            # print(e.error_description)
            if e.args[0] == 404:
                return None
            else:
                n += 1
    return out

def search_spotify(title, artist, album=None):
    query = '"{}"+artist:"{}"'.format(title, artist)
    if album is not None:
        query += '+album:"{}"'.format(album)
    if len(query) <= 250:
        result = try_multiple(sp.search, q=query, type='track')
        items = result['tracks']['items']
    else:   # spotify doesnt search with a query longer than 250 characters
        items = []
    return items


def search_spotify_flexible(title, artist, album):
    # Find Spotify URI based on metadata
    items = search_spotify(title, artist, album)
    if items == []:
        items = search_spotify(title, artist)
    if items == []:
        title = fix_string(title)
        items = search_spotify(title, artist)
    if items == []:
        artist = fix_string(artist)
        items = search_spotify(title, artist)
    if items == []:
        artist = strip_artist(artist)
        items = search_spotify(title, artist)
    if items == []:
        return None

    elif len(items) == 1:
        item = items[0]
    else:
        # Return most popular
        max_popularity = 0
        best_ind = 0
        for i, item in enumerate(items):
            if item is not None:
                if item["popularity"] > max_popularity:
                    max_popularity = item["popularity"]
                    best_ind = i
        item = items[best_ind]
    return item

def matching_strings_flexible(a, b):
    if a == "" or b == "":
        matches = 0.0
    else:
        a = fix_string(a)
        b = fix_string(b)
        a = a.replace("'", "")
        b = b.replace("'", "")
        min_len = min(len(a), len(b))
        matches = 0
        for i in range(min_len):
            if a[i] == b[i]:
                matches += 1
        matches /= min_len
    return matches

def get_spotify_features(uri_list):
    features = try_multiple(sp.audio_features, uri_list)
    return features

def get_spotify_tracks(uri_list):
    if len(uri_list) > 50:
        uri_list = uri_list[:50]
    tracks = try_multiple(sp.tracks, uri_list)
    if tracks == None:
        return None
    else:
        return tracks["tracks"]


def strip_artist(s):
    s = s.lower()   # lowercase
    s = s.replace("the ", "")
    keys = [' - ', '/', ' ft', 'feat', 'featuring', ' and ', ' with ', '_', ' vs', '&', ';', '+']
    for key in keys:
        loc = s.find(key)
        if loc != -1:
            s = s[:loc]
    return s

def fix_string(s):
    if s != "":
        s = s.lower()   # lowercase
        s = s.replace('\'s', '')    # remove 's
        s = s.replace('_', ' ')    # remove _
        s = re.sub("[\(\[].*?[\)\]]", "", s)    # remove everything in parantheses
        if s[-1] == " ":    # remove space at the end
            s = s[:-1]
    return s

def logprint(s, f):
    f.write(s + '\n')

def get_spotify_ids(json_path):
    with open(json_path) as f_json:
        json_data = json.load(f_json)
        json_data = json_data["response"]["songs"]
        if len(json_data) == 0:
            spotify_ids = []
        else:
            json_data = json_data[0]
            spotify_ids = []
            for track in json_data["tracks"]:
                if track["catalog"] == "spotify" and "foreign_id" in list(track.keys()):
                    spotify_ids.append(track["foreign_id"].split(":")[-1])
    return spotify_ids

def read_csv(input_file_path, delimiter=","):
    with open(input_file_path, "r") as f_in:
        reader = csv.DictReader(f_in, delimiter=delimiter)
        data = [{key: value for key, value in row.items()} for row in reader]
    return data