yusufc's picture
Update app.py
f10361e
from __future__ import print_function
import os
import json
import time
import sys
import pandas as pd
import numpy as np
import seaborn as sn
import gradio as gr
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestNeighbors
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
client_id = os.getenv("SPOTIPY_CLIENT_ID")
client_secret = os.getenv("SPOTIPY_CLIENT_SECRET")
sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(client_id=client_id, client_secret=client_secret))
df = pd.read_csv('spotify_data.csv')
df = df.drop(columns=['Unnamed: 0', "Unnamed: 0.1", "pos", "artist_uri", "album_uri", "duration_ms_x", "album_name", "name", "type", "id", "track_href", "analysis_url", "duration_ms_y", "time_signature", "artist_pop", "track_pop"])
df.drop_duplicates(subset=['uri'], inplace=True)
df.reset_index(drop=True, inplace=True)
df_num = df.select_dtypes(include = ['float64', 'int64'])
numeric_cols = ['acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'valence']
categorical_cols = ['key', 'mode']
# Create the preprocessing pipeline
preprocessing_pipeline = ColumnTransformer(
transformers=[
('num', StandardScaler(), numeric_cols),
('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
])
# Apply the preprocessing pipeline to your DataFrame
df_processed = preprocessing_pipeline.fit_transform(df_num)
num_cols_transformed = numeric_cols
cat_cols_transformed = preprocessing_pipeline.named_transformers_['cat'].get_feature_names_out(categorical_cols)
# Combine the transformed column names
all_cols_transformed = num_cols_transformed + cat_cols_transformed.tolist()
# Convert the processed NumPy array back to a DataFrame
df_processed = pd.DataFrame(df_processed, columns=all_cols_transformed)
def transform_query(track_uri):
audio_features = sp.audio_features(track_uri)[0]
track_data = []
track_dict = {
'acousticness': audio_features['acousticness'],
'danceability': audio_features['danceability'],
'energy': audio_features['energy'],
'instrumentalness': audio_features['instrumentalness'],
'liveness': audio_features['liveness'],
'loudness': audio_features['loudness'],
'speechiness': audio_features['speechiness'],
'tempo': audio_features['tempo'],
'valence': audio_features['valence'],
'key': audio_features['key'],
'mode': audio_features['mode']
}
track_data.append(track_dict)
query_data = pd.DataFrame(track_data)
return query_data
knn_model = NearestNeighbors(n_neighbors=10, algorithm='auto', metric='euclidean')
knn_model.fit(df_processed) # I'm using all the data for KNN
# Function to find similar songs to the input URI
def find_similar_songs(track_uri):
query_data = transform_query(track_uri)
# Scale the query data using the same scaler
query_data_scaled = preprocessing_pipeline.transform(query_data)
query_data_scaled_df = pd.DataFrame(query_data_scaled, columns=all_cols_transformed)
# Find the most similar songs using the KNN model
distances, indices = knn_model.kneighbors(query_data_scaled_df, n_neighbors=10)
# Retrieve the Artist Name, Song Name, and Track URI of the most similar songs
similar_songs = []
for index in indices[0]:
artist_name = df.iloc[index]['artist_name']
song_name = df.iloc[index]['track_name']
similar_uri = df.iloc[index]['uri']
track_id = similar_uri.split(":")[-1]
full_url = f"https://open.spotify.com/track/{track_id}"
similar_songs.append((artist_name, song_name, full_url))
return similar_songs
similar_songs = find_similar_songs('https://open.spotify.com/track/6rDaCGqcQB1urhpCrrD599?si=2ac7add2ea054ab2')
def format_output(similar_songs):
output = []
for song in similar_songs:
output.append({"Artist Name": song[0], "Song Name": song[1], "Spotify Track URL": song[2]})
return pd.DataFrame(output)
# Create the Gradio interface
iface = gr.Interface(
fn=find_similar_songs, # Your find_similar_songs function
inputs=gr.Textbox(label="Enter Spotify Track URL"),
outputs=gr.Dataframe(headers=["Artist Name", "Song Name", "Spotify Track URL"]),
live=True
)
iface.launch("share=True")