Spaces:
Runtime error
Runtime error
File size: 4,573 Bytes
734d5ec 69a36c6 734d5ec 69a36c6 734d5ec 69a36c6 0dc07cf 734d5ec 69a36c6 734d5ec 69a36c6 734d5ec f10361e 734d5ec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
from __future__ import print_function
import os
import json
import time
import sys
import pandas as pd
import numpy as np
import seaborn as sn
import gradio as gr
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestNeighbors
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
client_id = os.getenv("SPOTIPY_CLIENT_ID")
client_secret = os.getenv("SPOTIPY_CLIENT_SECRET")
sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(client_id=client_id, client_secret=client_secret))
df = pd.read_csv('spotify_data.csv')
df = df.drop(columns=['Unnamed: 0', "Unnamed: 0.1", "pos", "artist_uri", "album_uri", "duration_ms_x", "album_name", "name", "type", "id", "track_href", "analysis_url", "duration_ms_y", "time_signature", "artist_pop", "track_pop"])
df.drop_duplicates(subset=['uri'], inplace=True)
df.reset_index(drop=True, inplace=True)
df_num = df.select_dtypes(include = ['float64', 'int64'])
numeric_cols = ['acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'valence']
categorical_cols = ['key', 'mode']
# Create the preprocessing pipeline
preprocessing_pipeline = ColumnTransformer(
transformers=[
('num', StandardScaler(), numeric_cols),
('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
])
# Apply the preprocessing pipeline to your DataFrame
df_processed = preprocessing_pipeline.fit_transform(df_num)
num_cols_transformed = numeric_cols
cat_cols_transformed = preprocessing_pipeline.named_transformers_['cat'].get_feature_names_out(categorical_cols)
# Combine the transformed column names
all_cols_transformed = num_cols_transformed + cat_cols_transformed.tolist()
# Convert the processed NumPy array back to a DataFrame
df_processed = pd.DataFrame(df_processed, columns=all_cols_transformed)
def transform_query(track_uri):
audio_features = sp.audio_features(track_uri)[0]
track_data = []
track_dict = {
'acousticness': audio_features['acousticness'],
'danceability': audio_features['danceability'],
'energy': audio_features['energy'],
'instrumentalness': audio_features['instrumentalness'],
'liveness': audio_features['liveness'],
'loudness': audio_features['loudness'],
'speechiness': audio_features['speechiness'],
'tempo': audio_features['tempo'],
'valence': audio_features['valence'],
'key': audio_features['key'],
'mode': audio_features['mode']
}
track_data.append(track_dict)
query_data = pd.DataFrame(track_data)
return query_data
knn_model = NearestNeighbors(n_neighbors=10, algorithm='auto', metric='euclidean')
knn_model.fit(df_processed) # I'm using all the data for KNN
# Function to find similar songs to the input URI
def find_similar_songs(track_uri):
query_data = transform_query(track_uri)
# Scale the query data using the same scaler
query_data_scaled = preprocessing_pipeline.transform(query_data)
query_data_scaled_df = pd.DataFrame(query_data_scaled, columns=all_cols_transformed)
# Find the most similar songs using the KNN model
distances, indices = knn_model.kneighbors(query_data_scaled_df, n_neighbors=10)
# Retrieve the Artist Name, Song Name, and Track URI of the most similar songs
similar_songs = []
for index in indices[0]:
artist_name = df.iloc[index]['artist_name']
song_name = df.iloc[index]['track_name']
similar_uri = df.iloc[index]['uri']
track_id = similar_uri.split(":")[-1]
full_url = f"https://open.spotify.com/track/{track_id}"
similar_songs.append((artist_name, song_name, full_url))
return similar_songs
similar_songs = find_similar_songs('https://open.spotify.com/track/6rDaCGqcQB1urhpCrrD599?si=2ac7add2ea054ab2')
def format_output(similar_songs):
output = []
for song in similar_songs:
output.append({"Artist Name": song[0], "Song Name": song[1], "Spotify Track URL": song[2]})
return pd.DataFrame(output)
# Create the Gradio interface
iface = gr.Interface(
fn=find_similar_songs, # Your find_similar_songs function
inputs=gr.Textbox(label="Enter Spotify Track URL"),
outputs=gr.Dataframe(headers=["Artist Name", "Song Name", "Spotify Track URL"]),
live=True
)
iface.launch("share=True")
|