Spaces:

yusufc
/

Spotify-SimilarSongFinder

Runtime error

File size: 4,573 Bytes


from __future__ import print_function

import os
import json
import time
import sys


import pandas as pd
import numpy as np  
import seaborn as sn
import gradio as gr

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestNeighbors



import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

client_id = os.getenv("SPOTIPY_CLIENT_ID")
client_secret = os.getenv("SPOTIPY_CLIENT_SECRET")

sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(client_id=client_id, client_secret=client_secret))

df = pd.read_csv('spotify_data.csv')


df = df.drop(columns=['Unnamed: 0', "Unnamed: 0.1", "pos", "artist_uri", "album_uri", "duration_ms_x", "album_name", "name", "type", "id", "track_href", "analysis_url", "duration_ms_y", "time_signature", "artist_pop", "track_pop"])

df.drop_duplicates(subset=['uri'], inplace=True)
df.reset_index(drop=True, inplace=True)
df_num = df.select_dtypes(include = ['float64', 'int64'])


numeric_cols = ['acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'valence']
categorical_cols = ['key', 'mode']



# Create the preprocessing pipeline
preprocessing_pipeline = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ])

# Apply the preprocessing pipeline to your DataFrame
df_processed = preprocessing_pipeline.fit_transform(df_num)

num_cols_transformed = numeric_cols
cat_cols_transformed = preprocessing_pipeline.named_transformers_['cat'].get_feature_names_out(categorical_cols)

# Combine the transformed column names

all_cols_transformed = num_cols_transformed + cat_cols_transformed.tolist()

# Convert the processed NumPy array back to a DataFrame
df_processed = pd.DataFrame(df_processed, columns=all_cols_transformed)


def transform_query(track_uri):
    audio_features = sp.audio_features(track_uri)[0]
    track_data = []
    track_dict = {
        'acousticness': audio_features['acousticness'],
        'danceability': audio_features['danceability'],
        'energy': audio_features['energy'],
        'instrumentalness': audio_features['instrumentalness'],
        'liveness': audio_features['liveness'],
        'loudness': audio_features['loudness'],
        'speechiness': audio_features['speechiness'],
        'tempo': audio_features['tempo'],
        'valence': audio_features['valence'],
        'key': audio_features['key'],
        'mode': audio_features['mode']
    }
    
    track_data.append(track_dict)
    query_data = pd.DataFrame(track_data)
    return query_data


knn_model = NearestNeighbors(n_neighbors=10, algorithm='auto', metric='euclidean')
knn_model.fit(df_processed) # I'm using all the data for KNN

# Function to find similar songs to the input URI
def find_similar_songs(track_uri):

    query_data = transform_query(track_uri)
    
    # Scale the query data using the same scaler
    query_data_scaled = preprocessing_pipeline.transform(query_data)
    query_data_scaled_df = pd.DataFrame(query_data_scaled, columns=all_cols_transformed)

    # Find the most similar songs using the KNN model
    distances, indices = knn_model.kneighbors(query_data_scaled_df, n_neighbors=10)

    # Retrieve the Artist Name, Song Name, and Track URI of the most similar songs
    similar_songs = []
    for index in indices[0]:
        artist_name = df.iloc[index]['artist_name']
        song_name = df.iloc[index]['track_name']
        similar_uri = df.iloc[index]['uri']
        
        track_id = similar_uri.split(":")[-1]
        full_url = f"https://open.spotify.com/track/{track_id}"

        similar_songs.append((artist_name, song_name, full_url))
        
    return similar_songs


similar_songs = find_similar_songs('https://open.spotify.com/track/6rDaCGqcQB1urhpCrrD599?si=2ac7add2ea054ab2')


def format_output(similar_songs):
    output = []
    for song in similar_songs:
        output.append({"Artist Name": song[0], "Song Name": song[1], "Spotify Track URL": song[2]})
    return pd.DataFrame(output)

# Create the Gradio interface
iface = gr.Interface(
    fn=find_similar_songs,  # Your find_similar_songs function
    inputs=gr.Textbox(label="Enter Spotify Track URL"),
    outputs=gr.Dataframe(headers=["Artist Name", "Song Name", "Spotify Track URL"]),
    live=True
)


iface.launch("share=True")