import spotipy from spotipy.oauth2 import SpotifyClientCredentials import re import hashlib import json import pypianoroll import numpy as np import pretty_midi import csv """ You'll need a client ID and a client secret: https://developer.spotify.com/dashboard/applications Then, fill in the variables client_id and client_secret """ client_id = 'c520641b167a4cd0872d48e5232a41e6' client_secret = 'a455993eda164da2b67462c2e1382e91' client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret) sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager) def get_drums_note_density(mid): drum_mid = pretty_midi.PrettyMIDI() for instrument in mid.instruments: if instrument.is_drum: drum_mid.instruments.append(instrument) if len(drum_mid.instruments) != 1 or len(drum_mid.instruments[0].notes) == 0: return float("nan") else: start_time = drum_mid.instruments[0].notes[0].start end_time = drum_mid.instruments[0].notes[-1].end duration = end_time - start_time n_notes = len(drum_mid.instruments[0].notes) density = n_notes / duration return density def get_md5(path): with open(path, "rb") as f: md5 = hashlib.md5(f.read()).hexdigest() return md5 def get_hash(path): if path[-4:] == ".mid": try: mid = pretty_midi.PrettyMIDI(path) except: return "empty_pianoroll" try: rolls = mid.get_piano_roll() except: return "empty_pianoroll" if rolls.size == 0: return "empty_pianoroll" else: pr = pypianoroll.load(path) tracks = sorted(pr.tracks, key=lambda x: x.name) rolls = [track.pianoroll for track in tracks if track.pianoroll.shape[0] > 0] if rolls == []: return "empty_pianoroll" rolls = np.concatenate(rolls, axis=-1) hash_ = hashlib.sha1(np.ascontiguousarray(rolls)).hexdigest() return hash_ def get_note_density(mid): duration = mid.get_end_time() n_notes = sum([1 for instrument in mid.instruments for note in instrument.notes]) density = n_notes / duration return density def get_tempo(mid): tick_scale = mid._tick_scales[-1][-1] resolution = mid.resolution beat_duration = tick_scale * resolution mid_tempo = 60 / beat_duration return mid_tempo def get_n_instruments(mid): n_instruments = sum([1 for instrument in mid.instruments if instrument.notes != []]) return n_instruments def try_multiple(func, *args, **kwargs): n_max = 29 n = 0 failed = True while failed: if n > n_max: return None try: if args: out = func(*args) elif kwargs: out = func(**kwargs) failed = False except Exception as e: # print(e.error_description) if e.args[0] == 404: return None else: n += 1 return out def search_spotify(title, artist, album=None): query = '"{}"+artist:"{}"'.format(title, artist) if album is not None: query += '+album:"{}"'.format(album) if len(query) <= 250: result = try_multiple(sp.search, q=query, type='track') items = result['tracks']['items'] else: # spotify doesnt search with a query longer than 250 characters items = [] return items def search_spotify_flexible(title, artist, album): # Find Spotify URI based on metadata items = search_spotify(title, artist, album) if items == []: items = search_spotify(title, artist) if items == []: title = fix_string(title) items = search_spotify(title, artist) if items == []: artist = fix_string(artist) items = search_spotify(title, artist) if items == []: artist = strip_artist(artist) items = search_spotify(title, artist) if items == []: return None elif len(items) == 1: item = items[0] else: # Return most popular max_popularity = 0 best_ind = 0 for i, item in enumerate(items): if item is not None: if item["popularity"] > max_popularity: max_popularity = item["popularity"] best_ind = i item = items[best_ind] return item def matching_strings_flexible(a, b): if a == "" or b == "": matches = 0.0 else: a = fix_string(a) b = fix_string(b) a = a.replace("'", "") b = b.replace("'", "") min_len = min(len(a), len(b)) matches = 0 for i in range(min_len): if a[i] == b[i]: matches += 1 matches /= min_len return matches def get_spotify_features(uri_list): features = try_multiple(sp.audio_features, uri_list) return features def get_spotify_tracks(uri_list): if len(uri_list) > 50: uri_list = uri_list[:50] tracks = try_multiple(sp.tracks, uri_list) if tracks == None: return None else: return tracks["tracks"] def strip_artist(s): s = s.lower() # lowercase s = s.replace("the ", "") keys = [' - ', '/', ' ft', 'feat', 'featuring', ' and ', ' with ', '_', ' vs', '&', ';', '+'] for key in keys: loc = s.find(key) if loc != -1: s = s[:loc] return s def fix_string(s): if s != "": s = s.lower() # lowercase s = s.replace('\'s', '') # remove 's s = s.replace('_', ' ') # remove _ s = re.sub("[\(\[].*?[\)\]]", "", s) # remove everything in parantheses if s[-1] == " ": # remove space at the end s = s[:-1] return s def logprint(s, f): f.write(s + '\n') def get_spotify_ids(json_path): with open(json_path) as f_json: json_data = json.load(f_json) json_data = json_data["response"]["songs"] if len(json_data) == 0: spotify_ids = [] else: json_data = json_data[0] spotify_ids = [] for track in json_data["tracks"]: if track["catalog"] == "spotify" and "foreign_id" in list(track.keys()): spotify_ids.append(track["foreign_id"].split(":")[-1]) return spotify_ids def read_csv(input_file_path, delimiter=","): with open(input_file_path, "r") as f_in: reader = csv.DictReader(f_in, delimiter=delimiter) data = [{key: value for key, value in row.items()} for row in reader] return data