Spaces:
Runtime error
Runtime error
Upload 3 files
Browse files- .gitattributes +1 -0
- app.py +133 -0
- requirements.txt +0 -0
- spotify_data.csv +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
spotify_data.csv filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from __future__ import print_function
|
3 |
+
|
4 |
+
import os
|
5 |
+
import json
|
6 |
+
import time
|
7 |
+
import sys
|
8 |
+
|
9 |
+
|
10 |
+
import pandas as pd
|
11 |
+
import numpy as np
|
12 |
+
import seaborn as sn
|
13 |
+
import gradio as gr
|
14 |
+
|
15 |
+
from sklearn.compose import ColumnTransformer
|
16 |
+
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
17 |
+
from sklearn.pipeline import Pipeline
|
18 |
+
from sklearn.model_selection import train_test_split
|
19 |
+
from sklearn.neighbors import NearestNeighbors
|
20 |
+
|
21 |
+
|
22 |
+
|
23 |
+
import spotipy
|
24 |
+
from spotipy.oauth2 import SpotifyClientCredentials
|
25 |
+
|
26 |
+
|
27 |
+
sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials())
|
28 |
+
|
29 |
+
df = pd.read_csv('spotify_data.csv')
|
30 |
+
|
31 |
+
|
32 |
+
df = df.drop(columns=['Unnamed: 0', "Unnamed: 0.1", "pos", "artist_uri", "album_uri", "duration_ms_x", "album_name", "name", "type", "id", "track_href", "analysis_url", "duration_ms_y", "time_signature", "artist_pop", "track_pop"])
|
33 |
+
|
34 |
+
|
35 |
+
df.drop_duplicates(subset=['uri'], inplace=True)
|
36 |
+
df.reset_index(drop=True, inplace=True)
|
37 |
+
df_num = df.select_dtypes(include = ['float64', 'int64'])
|
38 |
+
|
39 |
+
|
40 |
+
numeric_cols = ['acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'valence']
|
41 |
+
categorical_cols = ['key', 'mode']
|
42 |
+
|
43 |
+
# Create the preprocessing pipeline
|
44 |
+
preprocessing_pipeline = ColumnTransformer(
|
45 |
+
transformers=[
|
46 |
+
('num', StandardScaler(), numeric_cols),
|
47 |
+
('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
|
48 |
+
])
|
49 |
+
|
50 |
+
# Apply the preprocessing pipeline to your DataFrame
|
51 |
+
df_processed = preprocessing_pipeline.fit_transform(df_num)
|
52 |
+
|
53 |
+
num_cols_transformed = numeric_cols
|
54 |
+
cat_cols_transformed = preprocessing_pipeline.named_transformers_['cat'].get_feature_names_out(categorical_cols)
|
55 |
+
|
56 |
+
# Combine the transformed column names
|
57 |
+
all_cols_transformed = num_cols_transformed + cat_cols_transformed.tolist()
|
58 |
+
|
59 |
+
# Convert the processed NumPy array back to a DataFrame
|
60 |
+
df_processed = pd.DataFrame(df_processed, columns=all_cols_transformed)
|
61 |
+
|
62 |
+
|
63 |
+
def transform_query(track_uri):
|
64 |
+
audio_features = sp.audio_features(track_uri)[0]
|
65 |
+
track_data = []
|
66 |
+
track_dict = {
|
67 |
+
'acousticness': audio_features['acousticness'],
|
68 |
+
'danceability': audio_features['danceability'],
|
69 |
+
'energy': audio_features['energy'],
|
70 |
+
'instrumentalness': audio_features['instrumentalness'],
|
71 |
+
'liveness': audio_features['liveness'],
|
72 |
+
'loudness': audio_features['loudness'],
|
73 |
+
'speechiness': audio_features['speechiness'],
|
74 |
+
'tempo': audio_features['tempo'],
|
75 |
+
'valence': audio_features['valence'],
|
76 |
+
'key': audio_features['key'],
|
77 |
+
'mode': audio_features['mode']
|
78 |
+
}
|
79 |
+
|
80 |
+
track_data.append(track_dict)
|
81 |
+
query_data = pd.DataFrame(track_data)
|
82 |
+
return query_data
|
83 |
+
|
84 |
+
|
85 |
+
knn_model = NearestNeighbors(n_neighbors=10, algorithm='auto', metric='euclidean')
|
86 |
+
knn_model.fit(df_processed) # I'm using all the data for KNN
|
87 |
+
|
88 |
+
# Function to find similar songs to the input URI
|
89 |
+
def find_similar_songs(track_uri):
|
90 |
+
|
91 |
+
query_data = transform_query(track_uri)
|
92 |
+
|
93 |
+
# Scale the query data using the same scaler
|
94 |
+
query_data_scaled = preprocessing_pipeline.transform(query_data)
|
95 |
+
query_data_scaled_df = pd.DataFrame(query_data_scaled, columns=all_cols_transformed)
|
96 |
+
|
97 |
+
# Find the most similar songs using the KNN model
|
98 |
+
distances, indices = knn_model.kneighbors(query_data_scaled_df, n_neighbors=10)
|
99 |
+
|
100 |
+
# Retrieve the Artist Name, Song Name, and Track URI of the most similar songs
|
101 |
+
similar_songs = []
|
102 |
+
for index in indices[0]:
|
103 |
+
artist_name = df.iloc[index]['artist_name']
|
104 |
+
song_name = df.iloc[index]['track_name']
|
105 |
+
similar_uri = df.iloc[index]['uri']
|
106 |
+
|
107 |
+
track_id = similar_uri.split(":")[-1]
|
108 |
+
full_url = f"https://open.spotify.com/track/{track_id}"
|
109 |
+
|
110 |
+
similar_songs.append((artist_name, song_name, full_url))
|
111 |
+
|
112 |
+
return similar_songs
|
113 |
+
|
114 |
+
|
115 |
+
similar_songs = find_similar_songs('https://open.spotify.com/track/6rDaCGqcQB1urhpCrrD599?si=2ac7add2ea054ab2')
|
116 |
+
|
117 |
+
|
118 |
+
def format_output(similar_songs):
|
119 |
+
output = []
|
120 |
+
for song in similar_songs:
|
121 |
+
output.append({"Artist Name": song[0], "Song Name": song[1], "Spotify Track URL": song[2]})
|
122 |
+
return pd.DataFrame(output)
|
123 |
+
|
124 |
+
# Create the Gradio interface
|
125 |
+
iface = gr.Interface(
|
126 |
+
fn=find_similar_songs, # Your find_similar_songs function
|
127 |
+
inputs=gr.Textbox(label="Enter Spotify Track URL"),
|
128 |
+
outputs=gr.Dataframe(headers=["Artist Name", "Song Name", "Spotify Track URL"]),
|
129 |
+
live=True
|
130 |
+
)
|
131 |
+
|
132 |
+
|
133 |
+
iface.launch("share=True")
|
requirements.txt
ADDED
Binary file (4.3 kB). View file
|
|
spotify_data.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:867146aa2b2922a59168d1e79f13c490fe5d4c401551285e980a7c7108f56d2f
|
3 |
+
size 33742074
|