latterworks commited on
Commit
4d9af98
·
verified ·
1 Parent(s): 1902030

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -16
app.py CHANGED
@@ -71,7 +71,7 @@ class AudioAnalyzer:
71
  return None, f"Download failed: {e.stderr}"
72
  except Exception as e:
73
  logger.error(f"Unexpected error during download: {str(e)}")
74
- return None, f"Unexpected error: {str(e)}"
75
 
76
  def extract_basic_features(self, audio_path: str, sr: int = 16000, max_duration: float = 60.0,
77
  progress=gr.Progress()) -> Tuple[Optional[str], Optional[str], Optional[str]]:
@@ -93,14 +93,14 @@ class AudioAnalyzer:
93
  'duration': duration,
94
  'sample_rate': sr,
95
  'samples': len(y),
96
- 'tempo': librosa.beat.beat_track(y=y, sr=sr)[0],
97
  'mfcc': librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13),
98
  'spectral_centroid': librosa.feature.spectral_centroid(y=y, sr=sr)[0],
99
  'spectral_rolloff': librosa.feature.spectral_rolloff(y=y, sr=sr)[0],
100
  'zero_crossing_rate': librosa.feature.zero_crossing_rate(y)[0]
101
  }
102
 
103
- progress(0.5, desc="Computing mel spectrogram...")
104
  hop_length = 512
105
  S_mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length, n_mels=80)
106
  S_dB = librosa.power_to_db(S_mel, ref=np.max)
@@ -108,7 +108,7 @@ class AudioAnalyzer:
108
  progress(0.8, desc="Creating visualizations...")
109
  fig, axes = plt.subplots(2, 2, figsize=(15, 10))
110
 
111
- time_axis = librosa.frames_to_time(range(len(y)), sr=sr)
112
  axes[0, 0].plot(time_axis, y)
113
  axes[0, 0].set_title('Waveform')
114
  axes[0, 0].set_xlabel('Time (s)')
@@ -130,15 +130,21 @@ class AudioAnalyzer:
130
 
131
  plt.tight_layout()
132
  plot_path = self.temp_dir / f"basic_features_{np.random.randint(10000)}.png"
133
- plt.savefig(plot_path, dpi=150, bbox_inches='tight')
134
  plt.close()
135
 
 
 
 
 
 
 
136
  summary = f"""
137
  **Audio Summary:**
138
  - Duration: {duration:.2f} seconds
139
  - Sample Rate: {sr} Hz
140
  - Estimated Tempo: {features['tempo']:.1f} BPM
141
- - Number of Samples: {len(y):,}
142
 
143
  **Feature Shapes:**
144
  - MFCC: {features['mfcc'].shape}
@@ -171,9 +177,9 @@ class AudioAnalyzer:
171
  y_harm = librosa.effects.harmonic(y=y, margin=8)
172
  chroma_harm = librosa.feature.chroma_cqt(y=y_harm, sr=sr)
173
  chroma_filter = np.minimum(chroma_harm,
174
- librosa.decompose.nn_filter(chroma_harm,
175
- aggregate=np.median,
176
- metric='cosine'))
177
  chroma_smooth = scipy.ndimage.median_filter(chroma_filter, size=(1, 9))
178
  chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
179
  chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)
@@ -195,7 +201,7 @@ class AudioAnalyzer:
195
 
196
  plt.tight_layout()
197
  plot_path = self.temp_dir / f"chroma_features_{np.random.randint(10000)}.png"
198
- plt.savefig(plot_path, dpi=150, bbox_inches='tight')
199
  plt.close()
200
 
201
  summary = "Chroma feature analysis complete! Visualizations show different chroma extraction methods for harmonic analysis."
@@ -216,7 +222,7 @@ class AudioAnalyzer:
216
  progress(0.1, desc="Loading audio...")
217
  y, sr = librosa.load(audio_path, sr=sr)
218
 
219
- progress(0.3, desc="Computing mel spectrogram...")
220
  hop_length = 512
221
  S_mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length, n_mels=80)
222
  S_dB = librosa.power_to_db(S_mel, ref=np.max)
@@ -233,7 +239,7 @@ class AudioAnalyzer:
233
 
234
  for i in range(num_patches_to_show):
235
  librosa.display.specshow(patches[..., i], y_axis='mel', x_axis='time',
236
- ax=axes[i], sr=sr, hop_length=hop_length)
237
  axes[i].set_title(f'Patch {i+1}')
238
 
239
  for i in range(num_patches_to_show, len(axes)):
@@ -241,14 +247,14 @@ class AudioAnalyzer:
241
 
242
  plt.tight_layout()
243
  plot_path = self.temp_dir / f"patches_{np.random.randint(10000)}.png"
244
- plt.savefig(plot_path, dpi=150, bbox_inches='tight')
245
  plt.close()
246
 
247
  summary = f"""
248
  **Patch Generation Summary:**
249
  - Total patches generated: {patches.shape[-1]}
250
- - Patch duration: {patch_duration} seconds
251
- - Hop duration: {hop_duration} seconds
252
  - Patch shape (mels, time, patches): {patches.shape}
253
  - Each patch covers {patch_frames} time frames
254
  """
@@ -275,7 +281,7 @@ def create_gradio_interface() -> gr.Blocks:
275
  - 🎼 **Chroma Features**: Harmonic content analysis with multiple extraction methods
276
  - 🧩 **Transformer Patches**: Fixed-duration patches for deep learning
277
 
278
- **Requirements**: Install `yt-dlp` with `pip install yt-dlp`.
279
  """)
280
 
281
  with gr.Row():
 
71
  return None, f"Download failed: {e.stderr}"
72
  except Exception as e:
73
  logger.error(f"Unexpected error during download: {str(e)}")
74
+ return None, f"Error: {str(e)}"
75
 
76
  def extract_basic_features(self, audio_path: str, sr: int = 16000, max_duration: float = 60.0,
77
  progress=gr.Progress()) -> Tuple[Optional[str], Optional[str], Optional[str]]:
 
93
  'duration': duration,
94
  'sample_rate': sr,
95
  'samples': len(y),
96
+ 'tempo': float(librosa.beat.beat_track(y=y, sr=sr)[0]), # Convert to float
97
  'mfcc': librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13),
98
  'spectral_centroid': librosa.feature.spectral_centroid(y=y, sr=sr)[0],
99
  'spectral_rolloff': librosa.feature.spectral_rolloff(y=y, sr=sr)[0],
100
  'zero_crossing_rate': librosa.feature.zero_crossing_rate(y)[0]
101
  }
102
 
103
+ progress(0.5, desc="Computing Mel spectrogram...")
104
  hop_length = 512
105
  S_mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length, n_mels=80)
106
  S_dB = librosa.power_to_db(S_mel, ref=np.max)
 
108
  progress(0.8, desc="Creating visualizations...")
109
  fig, axes = plt.subplots(2, 2, figsize=(15, 10))
110
 
111
+ time_axis = np.linspace(0, duration, len(y))
112
  axes[0, 0].plot(time_axis, y)
113
  axes[0, 0].set_title('Waveform')
114
  axes[0, 0].set_xlabel('Time (s)')
 
130
 
131
  plt.tight_layout()
132
  plot_path = self.temp_dir / f"basic_features_{np.random.randint(10000)}.png"
133
+ plt.savefig(plot_path, dpi=300, bbox_inches='tight')
134
  plt.close()
135
 
136
+ # Validate feature shapes
137
+ for key in ['mfcc', 'spectral_centroid', 'spectral_rolloff', 'zero_crossing_rate']:
138
+ if not isinstance(features[key].shape, tuple):
139
+ logger.error(f"Invalid shape for {key}: {features[key].shape}")
140
+ return None, None, f"Invalid feature shape for {key}"
141
+
142
  summary = f"""
143
  **Audio Summary:**
144
  - Duration: {duration:.2f} seconds
145
  - Sample Rate: {sr} Hz
146
  - Estimated Tempo: {features['tempo']:.1f} BPM
147
+ - Number of Samples: {features['samples']:,}
148
 
149
  **Feature Shapes:**
150
  - MFCC: {features['mfcc'].shape}
 
177
  y_harm = librosa.effects.harmonic(y=y, margin=8)
178
  chroma_harm = librosa.feature.chroma_cqt(y=y_harm, sr=sr)
179
  chroma_filter = np.minimum(chroma_harm,
180
+ librosa.decompose.nn_filter(chroma_harm,
181
+ aggregate=np.median,
182
+ metric='cosine'))
183
  chroma_smooth = scipy.ndimage.median_filter(chroma_filter, size=(1, 9))
184
  chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
185
  chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)
 
201
 
202
  plt.tight_layout()
203
  plot_path = self.temp_dir / f"chroma_features_{np.random.randint(10000)}.png"
204
+ plt.savefig(plot_path, dpi=300, bbox_inches='tight')
205
  plt.close()
206
 
207
  summary = "Chroma feature analysis complete! Visualizations show different chroma extraction methods for harmonic analysis."
 
222
  progress(0.1, desc="Loading audio...")
223
  y, sr = librosa.load(audio_path, sr=sr)
224
 
225
+ progress(0.3, desc="Computing Mel spectrogram...")
226
  hop_length = 512
227
  S_mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length, n_mels=80)
228
  S_dB = librosa.power_to_db(S_mel, ref=np.max)
 
239
 
240
  for i in range(num_patches_to_show):
241
  librosa.display.specshow(patches[..., i], y_axis='mel', x_axis='time',
242
+ ax=axes[i], sr=sr, hop_length=hop_length)
243
  axes[i].set_title(f'Patch {i+1}')
244
 
245
  for i in range(num_patches_to_show, len(axes)):
 
247
 
248
  plt.tight_layout()
249
  plot_path = self.temp_dir / f"patches_{np.random.randint(10000)}.png"
250
+ plt.savefig(plot_path, dpi=300, bbox_inches='tight')
251
  plt.close()
252
 
253
  summary = f"""
254
  **Patch Generation Summary:**
255
  - Total patches generated: {patches.shape[-1]}
256
+ - Patch duration: {patch_duration:.1f} seconds
257
+ - Hop duration: {hop_duration:.1f} seconds
258
  - Patch shape (mels, time, patches): {patches.shape}
259
  - Each patch covers {patch_frames} time frames
260
  """
 
281
  - 🎼 **Chroma Features**: Harmonic content analysis with multiple extraction methods
282
  - 🧩 **Transformer Patches**: Fixed-duration patches for deep learning
283
 
284
+ **Requirements**: Dependencies are automatically installed in Hugging Face Spaces via `requirements.txt`.
285
  """)
286
 
287
  with gr.Row():