Update app.py
Browse files
app.py
CHANGED
@@ -71,7 +71,7 @@ class AudioAnalyzer:
|
|
71 |
return None, f"Download failed: {e.stderr}"
|
72 |
except Exception as e:
|
73 |
logger.error(f"Unexpected error during download: {str(e)}")
|
74 |
-
return None, f"
|
75 |
|
76 |
def extract_basic_features(self, audio_path: str, sr: int = 16000, max_duration: float = 60.0,
|
77 |
progress=gr.Progress()) -> Tuple[Optional[str], Optional[str], Optional[str]]:
|
@@ -93,14 +93,14 @@ class AudioAnalyzer:
|
|
93 |
'duration': duration,
|
94 |
'sample_rate': sr,
|
95 |
'samples': len(y),
|
96 |
-
'tempo': librosa.beat.beat_track(y=y, sr=sr)[0],
|
97 |
'mfcc': librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13),
|
98 |
'spectral_centroid': librosa.feature.spectral_centroid(y=y, sr=sr)[0],
|
99 |
'spectral_rolloff': librosa.feature.spectral_rolloff(y=y, sr=sr)[0],
|
100 |
'zero_crossing_rate': librosa.feature.zero_crossing_rate(y)[0]
|
101 |
}
|
102 |
|
103 |
-
progress(0.5, desc="Computing
|
104 |
hop_length = 512
|
105 |
S_mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length, n_mels=80)
|
106 |
S_dB = librosa.power_to_db(S_mel, ref=np.max)
|
@@ -108,7 +108,7 @@ class AudioAnalyzer:
|
|
108 |
progress(0.8, desc="Creating visualizations...")
|
109 |
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
|
110 |
|
111 |
-
time_axis =
|
112 |
axes[0, 0].plot(time_axis, y)
|
113 |
axes[0, 0].set_title('Waveform')
|
114 |
axes[0, 0].set_xlabel('Time (s)')
|
@@ -130,15 +130,21 @@ class AudioAnalyzer:
|
|
130 |
|
131 |
plt.tight_layout()
|
132 |
plot_path = self.temp_dir / f"basic_features_{np.random.randint(10000)}.png"
|
133 |
-
plt.savefig(plot_path, dpi=
|
134 |
plt.close()
|
135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
summary = f"""
|
137 |
**Audio Summary:**
|
138 |
- Duration: {duration:.2f} seconds
|
139 |
- Sample Rate: {sr} Hz
|
140 |
- Estimated Tempo: {features['tempo']:.1f} BPM
|
141 |
-
- Number of Samples: {
|
142 |
|
143 |
**Feature Shapes:**
|
144 |
- MFCC: {features['mfcc'].shape}
|
@@ -171,9 +177,9 @@ class AudioAnalyzer:
|
|
171 |
y_harm = librosa.effects.harmonic(y=y, margin=8)
|
172 |
chroma_harm = librosa.feature.chroma_cqt(y=y_harm, sr=sr)
|
173 |
chroma_filter = np.minimum(chroma_harm,
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
chroma_smooth = scipy.ndimage.median_filter(chroma_filter, size=(1, 9))
|
178 |
chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
|
179 |
chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)
|
@@ -195,7 +201,7 @@ class AudioAnalyzer:
|
|
195 |
|
196 |
plt.tight_layout()
|
197 |
plot_path = self.temp_dir / f"chroma_features_{np.random.randint(10000)}.png"
|
198 |
-
plt.savefig(plot_path, dpi=
|
199 |
plt.close()
|
200 |
|
201 |
summary = "Chroma feature analysis complete! Visualizations show different chroma extraction methods for harmonic analysis."
|
@@ -216,7 +222,7 @@ class AudioAnalyzer:
|
|
216 |
progress(0.1, desc="Loading audio...")
|
217 |
y, sr = librosa.load(audio_path, sr=sr)
|
218 |
|
219 |
-
progress(0.3, desc="Computing
|
220 |
hop_length = 512
|
221 |
S_mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length, n_mels=80)
|
222 |
S_dB = librosa.power_to_db(S_mel, ref=np.max)
|
@@ -233,7 +239,7 @@ class AudioAnalyzer:
|
|
233 |
|
234 |
for i in range(num_patches_to_show):
|
235 |
librosa.display.specshow(patches[..., i], y_axis='mel', x_axis='time',
|
236 |
-
|
237 |
axes[i].set_title(f'Patch {i+1}')
|
238 |
|
239 |
for i in range(num_patches_to_show, len(axes)):
|
@@ -241,14 +247,14 @@ class AudioAnalyzer:
|
|
241 |
|
242 |
plt.tight_layout()
|
243 |
plot_path = self.temp_dir / f"patches_{np.random.randint(10000)}.png"
|
244 |
-
plt.savefig(plot_path, dpi=
|
245 |
plt.close()
|
246 |
|
247 |
summary = f"""
|
248 |
**Patch Generation Summary:**
|
249 |
- Total patches generated: {patches.shape[-1]}
|
250 |
-
- Patch duration: {patch_duration} seconds
|
251 |
-
- Hop duration: {hop_duration} seconds
|
252 |
- Patch shape (mels, time, patches): {patches.shape}
|
253 |
- Each patch covers {patch_frames} time frames
|
254 |
"""
|
@@ -275,7 +281,7 @@ def create_gradio_interface() -> gr.Blocks:
|
|
275 |
- 🎼 **Chroma Features**: Harmonic content analysis with multiple extraction methods
|
276 |
- 🧩 **Transformer Patches**: Fixed-duration patches for deep learning
|
277 |
|
278 |
-
**Requirements**:
|
279 |
""")
|
280 |
|
281 |
with gr.Row():
|
|
|
71 |
return None, f"Download failed: {e.stderr}"
|
72 |
except Exception as e:
|
73 |
logger.error(f"Unexpected error during download: {str(e)}")
|
74 |
+
return None, f"Error: {str(e)}"
|
75 |
|
76 |
def extract_basic_features(self, audio_path: str, sr: int = 16000, max_duration: float = 60.0,
|
77 |
progress=gr.Progress()) -> Tuple[Optional[str], Optional[str], Optional[str]]:
|
|
|
93 |
'duration': duration,
|
94 |
'sample_rate': sr,
|
95 |
'samples': len(y),
|
96 |
+
'tempo': float(librosa.beat.beat_track(y=y, sr=sr)[0]), # Convert to float
|
97 |
'mfcc': librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13),
|
98 |
'spectral_centroid': librosa.feature.spectral_centroid(y=y, sr=sr)[0],
|
99 |
'spectral_rolloff': librosa.feature.spectral_rolloff(y=y, sr=sr)[0],
|
100 |
'zero_crossing_rate': librosa.feature.zero_crossing_rate(y)[0]
|
101 |
}
|
102 |
|
103 |
+
progress(0.5, desc="Computing Mel spectrogram...")
|
104 |
hop_length = 512
|
105 |
S_mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length, n_mels=80)
|
106 |
S_dB = librosa.power_to_db(S_mel, ref=np.max)
|
|
|
108 |
progress(0.8, desc="Creating visualizations...")
|
109 |
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
|
110 |
|
111 |
+
time_axis = np.linspace(0, duration, len(y))
|
112 |
axes[0, 0].plot(time_axis, y)
|
113 |
axes[0, 0].set_title('Waveform')
|
114 |
axes[0, 0].set_xlabel('Time (s)')
|
|
|
130 |
|
131 |
plt.tight_layout()
|
132 |
plot_path = self.temp_dir / f"basic_features_{np.random.randint(10000)}.png"
|
133 |
+
plt.savefig(plot_path, dpi=300, bbox_inches='tight')
|
134 |
plt.close()
|
135 |
|
136 |
+
# Validate feature shapes
|
137 |
+
for key in ['mfcc', 'spectral_centroid', 'spectral_rolloff', 'zero_crossing_rate']:
|
138 |
+
if not isinstance(features[key].shape, tuple):
|
139 |
+
logger.error(f"Invalid shape for {key}: {features[key].shape}")
|
140 |
+
return None, None, f"Invalid feature shape for {key}"
|
141 |
+
|
142 |
summary = f"""
|
143 |
**Audio Summary:**
|
144 |
- Duration: {duration:.2f} seconds
|
145 |
- Sample Rate: {sr} Hz
|
146 |
- Estimated Tempo: {features['tempo']:.1f} BPM
|
147 |
+
- Number of Samples: {features['samples']:,}
|
148 |
|
149 |
**Feature Shapes:**
|
150 |
- MFCC: {features['mfcc'].shape}
|
|
|
177 |
y_harm = librosa.effects.harmonic(y=y, margin=8)
|
178 |
chroma_harm = librosa.feature.chroma_cqt(y=y_harm, sr=sr)
|
179 |
chroma_filter = np.minimum(chroma_harm,
|
180 |
+
librosa.decompose.nn_filter(chroma_harm,
|
181 |
+
aggregate=np.median,
|
182 |
+
metric='cosine'))
|
183 |
chroma_smooth = scipy.ndimage.median_filter(chroma_filter, size=(1, 9))
|
184 |
chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
|
185 |
chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)
|
|
|
201 |
|
202 |
plt.tight_layout()
|
203 |
plot_path = self.temp_dir / f"chroma_features_{np.random.randint(10000)}.png"
|
204 |
+
plt.savefig(plot_path, dpi=300, bbox_inches='tight')
|
205 |
plt.close()
|
206 |
|
207 |
summary = "Chroma feature analysis complete! Visualizations show different chroma extraction methods for harmonic analysis."
|
|
|
222 |
progress(0.1, desc="Loading audio...")
|
223 |
y, sr = librosa.load(audio_path, sr=sr)
|
224 |
|
225 |
+
progress(0.3, desc="Computing Mel spectrogram...")
|
226 |
hop_length = 512
|
227 |
S_mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length, n_mels=80)
|
228 |
S_dB = librosa.power_to_db(S_mel, ref=np.max)
|
|
|
239 |
|
240 |
for i in range(num_patches_to_show):
|
241 |
librosa.display.specshow(patches[..., i], y_axis='mel', x_axis='time',
|
242 |
+
ax=axes[i], sr=sr, hop_length=hop_length)
|
243 |
axes[i].set_title(f'Patch {i+1}')
|
244 |
|
245 |
for i in range(num_patches_to_show, len(axes)):
|
|
|
247 |
|
248 |
plt.tight_layout()
|
249 |
plot_path = self.temp_dir / f"patches_{np.random.randint(10000)}.png"
|
250 |
+
plt.savefig(plot_path, dpi=300, bbox_inches='tight')
|
251 |
plt.close()
|
252 |
|
253 |
summary = f"""
|
254 |
**Patch Generation Summary:**
|
255 |
- Total patches generated: {patches.shape[-1]}
|
256 |
+
- Patch duration: {patch_duration:.1f} seconds
|
257 |
+
- Hop duration: {hop_duration:.1f} seconds
|
258 |
- Patch shape (mels, time, patches): {patches.shape}
|
259 |
- Each patch covers {patch_frames} time frames
|
260 |
"""
|
|
|
281 |
- 🎼 **Chroma Features**: Harmonic content analysis with multiple extraction methods
|
282 |
- 🧩 **Transformer Patches**: Fixed-duration patches for deep learning
|
283 |
|
284 |
+
**Requirements**: Dependencies are automatically installed in Hugging Face Spaces via `requirements.txt`.
|
285 |
""")
|
286 |
|
287 |
with gr.Row():
|