frorozcol commited on
Commit
86a3a86
·
1 Parent(s): e90e7b9
app.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import librosa
3
+ from src.preprosecing import preprosecing
4
+ st.write("Clasificación de canciones")
5
+ #st.set_page_config(page_title="Upload Music", page_icon=":musical_note:", layout="wide")
6
+
7
+ def main():
8
+ uploaded_file = st.file_uploader("Choose a music file", type=["mp3"])
9
+
10
+ if uploaded_file is not None:
11
+ uploaded_file, features = preprosecing(uploaded_file)
12
+ st.audio(uploaded_file, format='audio/wav')
13
+ st.success("30 secs audio snippet")
14
+ st.success("File uploaded successfully")
15
+ st.write("This is the features from the audio")
16
+ st.write(features)
17
+ else:
18
+ st.warning("Please upload a file of type: mp3, wav")
19
+
20
+ if __name__ == "__main__":
21
+ main()
requierements.in ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ sklearn
2
+ pandas
3
+ librosa
4
+ matplotlib
5
+ lightgbm
6
+ catboost
7
+ xgboost
8
+ streamlit
9
+ seaborn
10
+ pydub
requierements.txt ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # This file is autogenerated by pip-compile with Python 3.10
3
+ # by the following command:
4
+ #
5
+ # pip-compile requierements.in
6
+ #
7
+ altair==4.2.0
8
+ # via streamlit
9
+ appdirs==1.4.4
10
+ # via pooch
11
+ attrs==22.2.0
12
+ # via jsonschema
13
+ audioread==3.0.0
14
+ # via librosa
15
+ blinker==1.5
16
+ # via streamlit
17
+ cachetools==5.2.1
18
+ # via streamlit
19
+ catboost==1.1.1
20
+ # via -r requierements.in
21
+ certifi==2022.12.7
22
+ # via requests
23
+ cffi==1.15.1
24
+ # via soundfile
25
+ charset-normalizer==2.1.1
26
+ # via requests
27
+ click==8.1.3
28
+ # via streamlit
29
+ commonmark==0.9.1
30
+ # via rich
31
+ contourpy==1.0.6
32
+ # via matplotlib
33
+ cycler==0.11.0
34
+ # via matplotlib
35
+ decorator==5.1.1
36
+ # via
37
+ # librosa
38
+ # validators
39
+ entrypoints==0.4
40
+ # via altair
41
+ fonttools==4.38.0
42
+ # via matplotlib
43
+ gitdb==4.0.10
44
+ # via gitpython
45
+ gitpython==3.1.30
46
+ # via streamlit
47
+ graphviz==0.20.1
48
+ # via catboost
49
+ idna==3.4
50
+ # via requests
51
+ importlib-metadata==6.0.0
52
+ # via streamlit
53
+ jinja2==3.1.2
54
+ # via
55
+ # altair
56
+ # pydeck
57
+ joblib==1.2.0
58
+ # via
59
+ # librosa
60
+ # scikit-learn
61
+ jsonschema==4.17.3
62
+ # via altair
63
+ kiwisolver==1.4.4
64
+ # via matplotlib
65
+ librosa==0.9.2
66
+ # via -r requierements.in
67
+ lightgbm==3.3.4
68
+ # via -r requierements.in
69
+ llvmlite==0.39.1
70
+ # via numba
71
+ markupsafe==2.1.1
72
+ # via jinja2
73
+ matplotlib==3.6.2
74
+ # via
75
+ # -r requierements.in
76
+ # catboost
77
+ # seaborn
78
+ numba==0.56.4
79
+ # via
80
+ # librosa
81
+ # resampy
82
+ numpy==1.23.5
83
+ # via
84
+ # altair
85
+ # catboost
86
+ # contourpy
87
+ # librosa
88
+ # lightgbm
89
+ # matplotlib
90
+ # numba
91
+ # pandas
92
+ # pyarrow
93
+ # pydeck
94
+ # resampy
95
+ # scikit-learn
96
+ # scipy
97
+ # seaborn
98
+ # streamlit
99
+ # xgboost
100
+ packaging==23.0
101
+ # via
102
+ # librosa
103
+ # matplotlib
104
+ # pooch
105
+ # streamlit
106
+ pandas==1.5.2
107
+ # via
108
+ # -r requierements.in
109
+ # altair
110
+ # catboost
111
+ # seaborn
112
+ # streamlit
113
+ pillow==9.4.0
114
+ # via
115
+ # matplotlib
116
+ # streamlit
117
+ plotly==5.11.0
118
+ # via catboost
119
+ pooch==1.6.0
120
+ # via librosa
121
+ protobuf==3.20.3
122
+ # via streamlit
123
+ pyarrow==10.0.1
124
+ # via streamlit
125
+ pycparser==2.21
126
+ # via cffi
127
+ pydeck==0.8.0
128
+ # via streamlit
129
+ pydub==0.25.1
130
+ # via -r requierements.in
131
+ pygments==2.14.0
132
+ # via rich
133
+ pympler==1.0.1
134
+ # via streamlit
135
+ pyparsing==3.0.9
136
+ # via matplotlib
137
+ pyrsistent==0.19.3
138
+ # via jsonschema
139
+ python-dateutil==2.8.2
140
+ # via
141
+ # matplotlib
142
+ # pandas
143
+ # streamlit
144
+ pytz==2022.7
145
+ # via pandas
146
+ pytz-deprecation-shim==0.1.0.post0
147
+ # via tzlocal
148
+ requests==2.28.1
149
+ # via
150
+ # pooch
151
+ # streamlit
152
+ resampy==0.4.2
153
+ # via librosa
154
+ rich==13.0.1
155
+ # via streamlit
156
+ scikit-learn==1.2.0
157
+ # via
158
+ # librosa
159
+ # lightgbm
160
+ scipy==1.10.0
161
+ # via
162
+ # catboost
163
+ # librosa
164
+ # lightgbm
165
+ # scikit-learn
166
+ # xgboost
167
+ seaborn==0.12.2
168
+ # via -r requierements.in
169
+ semver==2.13.0
170
+ # via streamlit
171
+ six==1.16.0
172
+ # via
173
+ # catboost
174
+ # python-dateutil
175
+ sklearn==0.0.post1
176
+ # via -r requierements.in
177
+ smmap==5.0.0
178
+ # via gitdb
179
+ soundfile==0.11.0
180
+ # via librosa
181
+ streamlit==1.16.0
182
+ # via -r requierements.in
183
+ tenacity==8.1.0
184
+ # via plotly
185
+ threadpoolctl==3.1.0
186
+ # via scikit-learn
187
+ toml==0.10.2
188
+ # via streamlit
189
+ toolz==0.12.0
190
+ # via altair
191
+ tornado==6.2
192
+ # via streamlit
193
+ typing-extensions==4.4.0
194
+ # via streamlit
195
+ tzdata==2022.7
196
+ # via pytz-deprecation-shim
197
+ tzlocal==4.2
198
+ # via streamlit
199
+ urllib3==1.26.14
200
+ # via requests
201
+ validators==0.20.0
202
+ # via streamlit
203
+ watchdog==2.2.1
204
+ # via streamlit
205
+ wheel==0.38.4
206
+ # via lightgbm
207
+ xgboost==1.7.3
208
+ # via -r requierements.in
209
+ zipp==3.11.0
210
+ # via importlib-metadata
211
+
212
+ # The following packages are considered to be unsafe in a requirements file:
213
+ # setuptools
src/.gitkeep ADDED
File without changes
src/__init__.py ADDED
File without changes
src/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (178 Bytes). View file
 
src/__pycache__/preprosecing.cpython-310.pyc ADDED
Binary file (8.33 kB). View file
 
src/models/model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b625bc7ac843c7f18ea2aacbce537b35d07051b75c1c8d84f2ef11c474a84d0b
3
+ size 56772779
src/models/model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5b7546f95b16cf021c16f5ff5d3b3e7b91cc294a126e5477a827c4be1dba4b8
3
+ size 56775478
src/output.png ADDED
src/preprosecing.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import librosa
4
+ import soundfile as sf
5
+
6
+
7
+ import statistics as st
8
+ from joblib import load
9
+ from pydub import AudioSegment
10
+
11
+ CLASSES = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']
12
+
13
+ class Features:
14
+ def __init__(self, y, sr, hop_length=5000):
15
+ """
16
+ Initialize the class with audio signal, sr and hop_length
17
+ :param y: audio signal
18
+ :param sr: sample rate of audio signal
19
+ :param hop_length: hop_length parameter used while calculating the chroma_stft feature
20
+ """
21
+ self.y = np.split(y, 10)
22
+ self.sr = sr
23
+ self.hop_length = hop_length
24
+
25
+ def get_mean_var(self, y):
26
+ """
27
+ Helper function to get mean and variance of feature
28
+ :param y: audio feature
29
+ :return: mean, variance
30
+ """
31
+ mean = y.mean()
32
+ var = y.var()
33
+ return mean, var
34
+
35
+ def zero_crossing_rate(self, y):
36
+ """
37
+ Returns the zero-crossing rate of the audio signal
38
+ :return: mean and variance of zero-crossing rate
39
+ """
40
+ values = librosa.feature.zero_crossing_rate(y)
41
+ return self.get_mean_var(values)
42
+
43
+ def harmonic_and_per(self, y):
44
+ """
45
+ separates the harmonic and percussive components of the audio signal
46
+ :return: harmonic and percussive components' mean and variance
47
+ """
48
+ y_harm, y_perc = librosa.effects.hpss(y)
49
+ harm = self.get_mean_var(y_harm)
50
+ perc = self.get_mean_var(y_perc)
51
+ return harm, perc
52
+
53
+
54
+ def tempo(self, y):
55
+ """
56
+ Extracts the tempo (beats per minute) of an audio signal.
57
+
58
+ Parameters:
59
+ y (ndarray): The audio signal represented as an numpy array.
60
+
61
+ Returns:
62
+ float: The tempo of the audio signal in beats per minute.
63
+ """
64
+ tempo = librosa.beat.tempo(y, sr=self.sr)
65
+ return tempo
66
+
67
+ def centroid(self, y):
68
+ """
69
+ Extracts the spectral centroid of an audio signal.
70
+
71
+ Parameters:
72
+ y (ndarray): The audio signal represented as an numpy array.
73
+
74
+ Returns:
75
+ tuple: A tuple containing the mean and variance of the spectral centroid.
76
+ """
77
+ centroid = librosa.feature.spectral_centroid(y, sr=self.sr)
78
+ return self.get_mean_var(centroid)
79
+
80
+
81
+
82
+ def mfccs(self, y):
83
+ """
84
+ Extracts the Mel-Frequency Cepstral Coefficients (MFCCs) of an audio signal.
85
+
86
+ Parameters:
87
+ y (ndarray): The audio signal represented as an numpy array.
88
+
89
+ Returns:
90
+ ndarray: An array containing the mean and variance of the MFCCs.
91
+ """
92
+ mfccs = librosa.feature.mfcc(y, sr=self.sr)
93
+ mean = mfccs.mean(axis=1)
94
+ var = mfccs.var(axis=1)
95
+ values = [[mean[i], var[i]] for i in range(mean.shape[0])]
96
+ return np.array(values).reshape(-1)
97
+
98
+ def chroma_stft(self, y):
99
+ """
100
+ Extracts the chroma feature of an audio signal.
101
+
102
+ Parameters:
103
+ y (ndarray): The audio signal represented as an numpy array.
104
+
105
+ Returns:
106
+ tuple: A tuple containing the mean and variance of the chroma feature.
107
+ """
108
+ chroma = librosa.feature.chroma_stft(y, sr=self.sr, hop_length=self.hop_length)
109
+ return self.get_mean_var(chroma)
110
+
111
+ def spectral_bandwidth(self, y):
112
+ """
113
+ Extracts the spectral bandwidth of an audio signal.
114
+
115
+ Parameters:
116
+ y (ndarray): The audio signal represented as an numpy array.
117
+
118
+ Returns:
119
+ tuple: A tuple containing the mean and variance of the spectral bandwidth.
120
+ """
121
+ spd = librosa.feature.spectral_bandwidth(y,sr=self.sr )
122
+ return self.get_mean_var(spd)
123
+
124
+ def rollof(self, y):
125
+ """
126
+ Extracts the spectral rolloff of an audio signal.
127
+
128
+ Parameters:
129
+ y (ndarray): The audio signal represented as an numpy array.
130
+
131
+ Returns:
132
+ tuple: A tuple containing the mean and variance of the spectral rolloff.
133
+ """
134
+ rollof = librosa.feature.spectral_rolloff(y=y, sr=self.sr)[0]
135
+ return self.get_mean_var(rollof)
136
+
137
+ def rms(self, y):
138
+ """
139
+ Extracts the root mean square (RMS) of an audio signal.
140
+
141
+ Parameters:
142
+ y (ndarray): The audio signal represented as an numpy array.
143
+
144
+ Returns:
145
+ tuple: A tuple containing the mean and variance of the RMS.
146
+ """
147
+ rms = librosa.feature.rms(y=y)
148
+ return self.get_mean_var(rms)
149
+
150
+ def features(self,y):
151
+ """
152
+ Extracts various audio features from an audio signal.
153
+
154
+ Parameters:
155
+ y (ndarray): The audio signal represented as an numpy array.
156
+
157
+ Returns:
158
+ ndarray: An array containing the extracted audio features.
159
+ """
160
+ tempo = self.tempo(y)
161
+ centroid_mean, centroid_var = self.centroid(y)
162
+ chroma_mean, chroma_var = self.chroma_stft(y)
163
+ zcr_mean, zcr_var = self.zero_crossing_rate(y)
164
+ spd_mean, spd_var = self.spectral_bandwidth(y)
165
+ rollof_mean, rollof_var = self.rollof(y)
166
+ rsm_mean, rsm_var = self.rms(y)
167
+ harm, perc = self.harmonic_and_per(y)
168
+ harm_mean, harm_var = harm
169
+ perc_mean, perc_var = perc
170
+ mfccs = self.mfccs(y)
171
+
172
+ features = np.array([y.shape[0],
173
+ chroma_mean, chroma_var,
174
+ rsm_mean, rsm_var,
175
+ centroid_mean, centroid_var ,
176
+ spd_mean, spd_var,
177
+ rollof_mean, rollof_var,
178
+ zcr_mean, zcr_var,
179
+ harm_mean, harm_var,
180
+ perc_mean, perc_var ,
181
+ tempo,
182
+ ],
183
+ dtype=np.float32)
184
+ features = np.concatenate([features, mfccs])
185
+ return features
186
+
187
+ def splits_3sec(self):
188
+ """
189
+ Splits an audio signal into 3-second sub-sequences and extracts audio features from each sub-sequence.
190
+
191
+ Returns:
192
+ ndarray: An array containing the extracted audio features for each 3-second sub-sequence.
193
+ """
194
+ features_split = []
195
+ for sub_sequence in self.y:
196
+ feature = self.features(sub_sequence)
197
+ features_split.append(feature)
198
+
199
+ features_np = np.array(features_split)
200
+ return features_np
201
+
202
+
203
+ def load_model():
204
+ path = os.path.dirname(__file__)
205
+ path_model = os.path.join(path, 'models', "model.pkl")
206
+ model = load(path_model)
207
+ return model
208
+
209
+ def predict(features):
210
+ model = load_model()
211
+ prediction = model.predict(features)
212
+ mode = st.mode(prediction)
213
+ return CLASSES[mode], prediction
214
+
215
+ def cuts_silence(audio):
216
+ audio_file, _ = librosa.effects.trim(audio)
217
+ return audio_file
218
+
219
+ def convert_mp3_to_wav(music_file):
220
+ name_file = "music_file.wav"
221
+ sound = AudioSegment.from_mp3(music_file)
222
+ sound.export(name_file,format="wav")
223
+ return name_file
224
+
225
+
226
+ def preprosecing(uploaded_file):
227
+ name_file = convert_mp3_to_wav(uploaded_file)
228
+ y, sr = librosa.load(name_file)
229
+ audio_file = cuts_silence(y)
230
+ audio_file = audio_file[:sr*30]
231
+ sf.write(file=name_file, data=audio_file, samplerate=sr)
232
+ file = open(name_file, 'rb')
233
+ features = Features(audio_file, sr).splits_3sec()
234
+ prediction = predict(features)
235
+ return file, prediction