Spaces:
Running
on
Zero
Running
on
Zero
File size: 6,633 Bytes
69defc9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 |
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import re
import hashlib
import json
import pypianoroll
import numpy as np
import pretty_midi
import csv
"""
You'll need a client ID and a client secret:
https://developer.spotify.com/dashboard/applications
Then, fill in the variables client_id and client_secret
"""
client_id = 'c520641b167a4cd0872d48e5232a41e6'
client_secret = 'a455993eda164da2b67462c2e1382e91'
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
def get_drums_note_density(mid):
drum_mid = pretty_midi.PrettyMIDI()
for instrument in mid.instruments:
if instrument.is_drum:
drum_mid.instruments.append(instrument)
if len(drum_mid.instruments) != 1 or len(drum_mid.instruments[0].notes) == 0:
return float("nan")
else:
start_time = drum_mid.instruments[0].notes[0].start
end_time = drum_mid.instruments[0].notes[-1].end
duration = end_time - start_time
n_notes = len(drum_mid.instruments[0].notes)
density = n_notes / duration
return density
def get_md5(path):
with open(path, "rb") as f:
md5 = hashlib.md5(f.read()).hexdigest()
return md5
def get_hash(path):
if path[-4:] == ".mid":
try:
mid = pretty_midi.PrettyMIDI(path)
except:
return "empty_pianoroll"
try:
rolls = mid.get_piano_roll()
except:
return "empty_pianoroll"
if rolls.size == 0:
return "empty_pianoroll"
else:
pr = pypianoroll.load(path)
tracks = sorted(pr.tracks, key=lambda x: x.name)
rolls = [track.pianoroll for track in tracks if track.pianoroll.shape[0] > 0]
if rolls == []:
return "empty_pianoroll"
rolls = np.concatenate(rolls, axis=-1)
hash_ = hashlib.sha1(np.ascontiguousarray(rolls)).hexdigest()
return hash_
def get_note_density(mid):
duration = mid.get_end_time()
n_notes = sum([1 for instrument in mid.instruments for note in instrument.notes])
density = n_notes / duration
return density
def get_tempo(mid):
tick_scale = mid._tick_scales[-1][-1]
resolution = mid.resolution
beat_duration = tick_scale * resolution
mid_tempo = 60 / beat_duration
return mid_tempo
def get_n_instruments(mid):
n_instruments = sum([1 for instrument in mid.instruments if instrument.notes != []])
return n_instruments
def try_multiple(func, *args, **kwargs):
n_max = 29
n = 0
failed = True
while failed:
if n > n_max:
return None
try:
if args:
out = func(*args)
elif kwargs:
out = func(**kwargs)
failed = False
except Exception as e:
# print(e.error_description)
if e.args[0] == 404:
return None
else:
n += 1
return out
def search_spotify(title, artist, album=None):
query = '"{}"+artist:"{}"'.format(title, artist)
if album is not None:
query += '+album:"{}"'.format(album)
if len(query) <= 250:
result = try_multiple(sp.search, q=query, type='track')
items = result['tracks']['items']
else: # spotify doesnt search with a query longer than 250 characters
items = []
return items
def search_spotify_flexible(title, artist, album):
# Find Spotify URI based on metadata
items = search_spotify(title, artist, album)
if items == []:
items = search_spotify(title, artist)
if items == []:
title = fix_string(title)
items = search_spotify(title, artist)
if items == []:
artist = fix_string(artist)
items = search_spotify(title, artist)
if items == []:
artist = strip_artist(artist)
items = search_spotify(title, artist)
if items == []:
return None
elif len(items) == 1:
item = items[0]
else:
# Return most popular
max_popularity = 0
best_ind = 0
for i, item in enumerate(items):
if item is not None:
if item["popularity"] > max_popularity:
max_popularity = item["popularity"]
best_ind = i
item = items[best_ind]
return item
def matching_strings_flexible(a, b):
if a == "" or b == "":
matches = 0.0
else:
a = fix_string(a)
b = fix_string(b)
a = a.replace("'", "")
b = b.replace("'", "")
min_len = min(len(a), len(b))
matches = 0
for i in range(min_len):
if a[i] == b[i]:
matches += 1
matches /= min_len
return matches
def get_spotify_features(uri_list):
features = try_multiple(sp.audio_features, uri_list)
return features
def get_spotify_tracks(uri_list):
if len(uri_list) > 50:
uri_list = uri_list[:50]
tracks = try_multiple(sp.tracks, uri_list)
if tracks == None:
return None
else:
return tracks["tracks"]
def strip_artist(s):
s = s.lower() # lowercase
s = s.replace("the ", "")
keys = [' - ', '/', ' ft', 'feat', 'featuring', ' and ', ' with ', '_', ' vs', '&', ';', '+']
for key in keys:
loc = s.find(key)
if loc != -1:
s = s[:loc]
return s
def fix_string(s):
if s != "":
s = s.lower() # lowercase
s = s.replace('\'s', '') # remove 's
s = s.replace('_', ' ') # remove _
s = re.sub("[\(\[].*?[\)\]]", "", s) # remove everything in parantheses
if s[-1] == " ": # remove space at the end
s = s[:-1]
return s
def logprint(s, f):
f.write(s + '\n')
def get_spotify_ids(json_path):
with open(json_path) as f_json:
json_data = json.load(f_json)
json_data = json_data["response"]["songs"]
if len(json_data) == 0:
spotify_ids = []
else:
json_data = json_data[0]
spotify_ids = []
for track in json_data["tracks"]:
if track["catalog"] == "spotify" and "foreign_id" in list(track.keys()):
spotify_ids.append(track["foreign_id"].split(":")[-1])
return spotify_ids
def read_csv(input_file_path, delimiter=","):
with open(input_file_path, "r") as f_in:
reader = csv.DictReader(f_in, delimiter=delimiter)
data = [{key: value for key, value in row.items()} for row in reader]
return data |