vincentamato's picture
Initial commit
69defc9
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import re
import hashlib
import json
import pypianoroll
import numpy as np
import pretty_midi
import csv
"""
You'll need a client ID and a client secret:
https://developer.spotify.com/dashboard/applications
Then, fill in the variables client_id and client_secret
"""
client_id = 'c520641b167a4cd0872d48e5232a41e6'
client_secret = 'a455993eda164da2b67462c2e1382e91'
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
def get_drums_note_density(mid):
drum_mid = pretty_midi.PrettyMIDI()
for instrument in mid.instruments:
if instrument.is_drum:
drum_mid.instruments.append(instrument)
if len(drum_mid.instruments) != 1 or len(drum_mid.instruments[0].notes) == 0:
return float("nan")
else:
start_time = drum_mid.instruments[0].notes[0].start
end_time = drum_mid.instruments[0].notes[-1].end
duration = end_time - start_time
n_notes = len(drum_mid.instruments[0].notes)
density = n_notes / duration
return density
def get_md5(path):
with open(path, "rb") as f:
md5 = hashlib.md5(f.read()).hexdigest()
return md5
def get_hash(path):
if path[-4:] == ".mid":
try:
mid = pretty_midi.PrettyMIDI(path)
except:
return "empty_pianoroll"
try:
rolls = mid.get_piano_roll()
except:
return "empty_pianoroll"
if rolls.size == 0:
return "empty_pianoroll"
else:
pr = pypianoroll.load(path)
tracks = sorted(pr.tracks, key=lambda x: x.name)
rolls = [track.pianoroll for track in tracks if track.pianoroll.shape[0] > 0]
if rolls == []:
return "empty_pianoroll"
rolls = np.concatenate(rolls, axis=-1)
hash_ = hashlib.sha1(np.ascontiguousarray(rolls)).hexdigest()
return hash_
def get_note_density(mid):
duration = mid.get_end_time()
n_notes = sum([1 for instrument in mid.instruments for note in instrument.notes])
density = n_notes / duration
return density
def get_tempo(mid):
tick_scale = mid._tick_scales[-1][-1]
resolution = mid.resolution
beat_duration = tick_scale * resolution
mid_tempo = 60 / beat_duration
return mid_tempo
def get_n_instruments(mid):
n_instruments = sum([1 for instrument in mid.instruments if instrument.notes != []])
return n_instruments
def try_multiple(func, *args, **kwargs):
n_max = 29
n = 0
failed = True
while failed:
if n > n_max:
return None
try:
if args:
out = func(*args)
elif kwargs:
out = func(**kwargs)
failed = False
except Exception as e:
# print(e.error_description)
if e.args[0] == 404:
return None
else:
n += 1
return out
def search_spotify(title, artist, album=None):
query = '"{}"+artist:"{}"'.format(title, artist)
if album is not None:
query += '+album:"{}"'.format(album)
if len(query) <= 250:
result = try_multiple(sp.search, q=query, type='track')
items = result['tracks']['items']
else: # spotify doesnt search with a query longer than 250 characters
items = []
return items
def search_spotify_flexible(title, artist, album):
# Find Spotify URI based on metadata
items = search_spotify(title, artist, album)
if items == []:
items = search_spotify(title, artist)
if items == []:
title = fix_string(title)
items = search_spotify(title, artist)
if items == []:
artist = fix_string(artist)
items = search_spotify(title, artist)
if items == []:
artist = strip_artist(artist)
items = search_spotify(title, artist)
if items == []:
return None
elif len(items) == 1:
item = items[0]
else:
# Return most popular
max_popularity = 0
best_ind = 0
for i, item in enumerate(items):
if item is not None:
if item["popularity"] > max_popularity:
max_popularity = item["popularity"]
best_ind = i
item = items[best_ind]
return item
def matching_strings_flexible(a, b):
if a == "" or b == "":
matches = 0.0
else:
a = fix_string(a)
b = fix_string(b)
a = a.replace("'", "")
b = b.replace("'", "")
min_len = min(len(a), len(b))
matches = 0
for i in range(min_len):
if a[i] == b[i]:
matches += 1
matches /= min_len
return matches
def get_spotify_features(uri_list):
features = try_multiple(sp.audio_features, uri_list)
return features
def get_spotify_tracks(uri_list):
if len(uri_list) > 50:
uri_list = uri_list[:50]
tracks = try_multiple(sp.tracks, uri_list)
if tracks == None:
return None
else:
return tracks["tracks"]
def strip_artist(s):
s = s.lower() # lowercase
s = s.replace("the ", "")
keys = [' - ', '/', ' ft', 'feat', 'featuring', ' and ', ' with ', '_', ' vs', '&', ';', '+']
for key in keys:
loc = s.find(key)
if loc != -1:
s = s[:loc]
return s
def fix_string(s):
if s != "":
s = s.lower() # lowercase
s = s.replace('\'s', '') # remove 's
s = s.replace('_', ' ') # remove _
s = re.sub("[\(\[].*?[\)\]]", "", s) # remove everything in parantheses
if s[-1] == " ": # remove space at the end
s = s[:-1]
return s
def logprint(s, f):
f.write(s + '\n')
def get_spotify_ids(json_path):
with open(json_path) as f_json:
json_data = json.load(f_json)
json_data = json_data["response"]["songs"]
if len(json_data) == 0:
spotify_ids = []
else:
json_data = json_data[0]
spotify_ids = []
for track in json_data["tracks"]:
if track["catalog"] == "spotify" and "foreign_id" in list(track.keys()):
spotify_ids.append(track["foreign_id"].split(":")[-1])
return spotify_ids
def read_csv(input_file_path, delimiter=","):
with open(input_file_path, "r") as f_in:
reader = csv.DictReader(f_in, delimiter=delimiter)
data = [{key: value for key, value in row.items()} for row in reader]
return data