yusufc commited on
Commit
734d5ec
·
1 Parent(s): 0f3888c

Upload 3 files

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. app.py +133 -0
  3. requirements.txt +0 -0
  4. spotify_data.csv +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ spotify_data.csv filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from __future__ import print_function
3
+
4
+ import os
5
+ import json
6
+ import time
7
+ import sys
8
+
9
+
10
+ import pandas as pd
11
+ import numpy as np
12
+ import seaborn as sn
13
+ import gradio as gr
14
+
15
+ from sklearn.compose import ColumnTransformer
16
+ from sklearn.preprocessing import StandardScaler, OneHotEncoder
17
+ from sklearn.pipeline import Pipeline
18
+ from sklearn.model_selection import train_test_split
19
+ from sklearn.neighbors import NearestNeighbors
20
+
21
+
22
+
23
+ import spotipy
24
+ from spotipy.oauth2 import SpotifyClientCredentials
25
+
26
+
27
+ sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials())
28
+
29
+ df = pd.read_csv('spotify_data.csv')
30
+
31
+
32
+ df = df.drop(columns=['Unnamed: 0', "Unnamed: 0.1", "pos", "artist_uri", "album_uri", "duration_ms_x", "album_name", "name", "type", "id", "track_href", "analysis_url", "duration_ms_y", "time_signature", "artist_pop", "track_pop"])
33
+
34
+
35
+ df.drop_duplicates(subset=['uri'], inplace=True)
36
+ df.reset_index(drop=True, inplace=True)
37
+ df_num = df.select_dtypes(include = ['float64', 'int64'])
38
+
39
+
40
+ numeric_cols = ['acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'valence']
41
+ categorical_cols = ['key', 'mode']
42
+
43
+ # Create the preprocessing pipeline
44
+ preprocessing_pipeline = ColumnTransformer(
45
+ transformers=[
46
+ ('num', StandardScaler(), numeric_cols),
47
+ ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
48
+ ])
49
+
50
+ # Apply the preprocessing pipeline to your DataFrame
51
+ df_processed = preprocessing_pipeline.fit_transform(df_num)
52
+
53
+ num_cols_transformed = numeric_cols
54
+ cat_cols_transformed = preprocessing_pipeline.named_transformers_['cat'].get_feature_names_out(categorical_cols)
55
+
56
+ # Combine the transformed column names
57
+ all_cols_transformed = num_cols_transformed + cat_cols_transformed.tolist()
58
+
59
+ # Convert the processed NumPy array back to a DataFrame
60
+ df_processed = pd.DataFrame(df_processed, columns=all_cols_transformed)
61
+
62
+
63
+ def transform_query(track_uri):
64
+ audio_features = sp.audio_features(track_uri)[0]
65
+ track_data = []
66
+ track_dict = {
67
+ 'acousticness': audio_features['acousticness'],
68
+ 'danceability': audio_features['danceability'],
69
+ 'energy': audio_features['energy'],
70
+ 'instrumentalness': audio_features['instrumentalness'],
71
+ 'liveness': audio_features['liveness'],
72
+ 'loudness': audio_features['loudness'],
73
+ 'speechiness': audio_features['speechiness'],
74
+ 'tempo': audio_features['tempo'],
75
+ 'valence': audio_features['valence'],
76
+ 'key': audio_features['key'],
77
+ 'mode': audio_features['mode']
78
+ }
79
+
80
+ track_data.append(track_dict)
81
+ query_data = pd.DataFrame(track_data)
82
+ return query_data
83
+
84
+
85
+ knn_model = NearestNeighbors(n_neighbors=10, algorithm='auto', metric='euclidean')
86
+ knn_model.fit(df_processed) # I'm using all the data for KNN
87
+
88
+ # Function to find similar songs to the input URI
89
+ def find_similar_songs(track_uri):
90
+
91
+ query_data = transform_query(track_uri)
92
+
93
+ # Scale the query data using the same scaler
94
+ query_data_scaled = preprocessing_pipeline.transform(query_data)
95
+ query_data_scaled_df = pd.DataFrame(query_data_scaled, columns=all_cols_transformed)
96
+
97
+ # Find the most similar songs using the KNN model
98
+ distances, indices = knn_model.kneighbors(query_data_scaled_df, n_neighbors=10)
99
+
100
+ # Retrieve the Artist Name, Song Name, and Track URI of the most similar songs
101
+ similar_songs = []
102
+ for index in indices[0]:
103
+ artist_name = df.iloc[index]['artist_name']
104
+ song_name = df.iloc[index]['track_name']
105
+ similar_uri = df.iloc[index]['uri']
106
+
107
+ track_id = similar_uri.split(":")[-1]
108
+ full_url = f"https://open.spotify.com/track/{track_id}"
109
+
110
+ similar_songs.append((artist_name, song_name, full_url))
111
+
112
+ return similar_songs
113
+
114
+
115
+ similar_songs = find_similar_songs('https://open.spotify.com/track/6rDaCGqcQB1urhpCrrD599?si=2ac7add2ea054ab2')
116
+
117
+
118
+ def format_output(similar_songs):
119
+ output = []
120
+ for song in similar_songs:
121
+ output.append({"Artist Name": song[0], "Song Name": song[1], "Spotify Track URL": song[2]})
122
+ return pd.DataFrame(output)
123
+
124
+ # Create the Gradio interface
125
+ iface = gr.Interface(
126
+ fn=find_similar_songs, # Your find_similar_songs function
127
+ inputs=gr.Textbox(label="Enter Spotify Track URL"),
128
+ outputs=gr.Dataframe(headers=["Artist Name", "Song Name", "Spotify Track URL"]),
129
+ live=True
130
+ )
131
+
132
+
133
+ iface.launch("share=True")
requirements.txt ADDED
Binary file (4.3 kB). View file
 
spotify_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:867146aa2b2922a59168d1e79f13c490fe5d4c401551285e980a7c7108f56d2f
3
+ size 33742074