latterworks commited on
Commit
daf4d3a
·
verified ·
1 Parent(s): 6310c31

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +222 -402
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import gradio as gr
2
  import subprocess
3
- import os
4
  import tempfile
5
  import librosa
6
  import librosa.display
@@ -8,447 +7,268 @@ import matplotlib.pyplot as plt
8
  import numpy as np
9
  import scipy.ndimage
10
  from pathlib import Path
11
-
12
  import logging
13
  import warnings
14
-
15
-
16
-
17
  import shutil
18
- from typing import Tuple, Optional, Dict, Any
19
 
20
- # Configure matplotlib for web display
21
  plt.switch_backend('Agg')
22
  warnings.filterwarnings('ignore')
23
-
24
- # Setup logging
25
- logging.basicConfig(
26
- level=logging.INFO,
27
- format="%(asctime)s - %(levelname)s - %(message)s",
28
- handlers=[logging.StreamHandler()]
29
- )
30
  logger = logging.getLogger(__name__)
31
 
32
  class AudioAnalyzer:
33
- """Core class for audio analysis with modular feature extraction methods."""
34
-
35
- def __init__(self, temp_dir: Optional[str] = None):
36
- """Initialize with a temporary directory for file storage."""
37
- self.temp_dir = Path(temp_dir or tempfile.mkdtemp())
38
- self.temp_dir.mkdir(exist_ok=True)
39
- self.plot_files = [] # Track plot files for cleanup
40
- logger.info(f"Initialized temporary directory: {self.temp_dir}")
41
 
42
- def cleanup(self) -> None:
43
- """Remove temporary directory and plot files."""
44
  for plot_file in self.plot_files:
45
- if Path(plot_file).exists():
46
- try:
47
- Path(plot_file).unlink()
48
- logger.info(f"Removed plot file: {plot_file}")
49
- except Exception as e:
50
- logger.warning(f"Failed to remove plot file {plot_file}: {str(e)}")
51
- if self.temp_dir.exists():
52
- shutil.rmtree(self.temp_dir, ignore_errors=True)
53
- logger.info(f"Cleaned up temporary directory: {self.temp_dir}")
54
 
55
  def download_youtube_audio(self, video_url: str, progress=gr.Progress()) -> Tuple[Optional[str], str]:
56
- """Download audio from YouTube using yt-dlp."""
57
  if not video_url:
58
  return None, "Please provide a valid YouTube URL"
59
 
60
- progress(0.1, desc="Initializing download...")
61
- output_dir = self.temp_dir / "downloaded_audio"
62
- output_dir.mkdir(exist_ok=True)
63
- output_file = output_dir / "audio.mp3"
64
-
65
- command = [
66
- "yt-dlp",
67
- "-x",
68
- "--audio-format", "mp3",
69
- "-o", str(output_file),
70
- "--no-playlist",
71
- "--restrict-filenames",
72
- video_url
73
- ]
74
-
75
  try:
76
- progress(0.3, desc="Downloading audio...")
77
- subprocess.run(command, check=True, capture_output=True, text=True)
78
- progress(1.0, desc="Download complete!")
79
- return str(output_file), f"Successfully downloaded audio: {output_file.name}"
 
 
80
  except FileNotFoundError:
81
- return None, "yt-dlp not found. Install it with: pip install yt-dlp"
82
  except subprocess.CalledProcessError as e:
83
  return None, f"Download failed: {e.stderr}"
84
- except Exception as e:
85
- logger.error(f"Unexpected error during download: {str(e)}")
86
- return None, f"Error: {str(e)}"
87
 
88
- def save_plot(self, fig, filename: str) -> Optional[str]:
89
- """Save matplotlib figure to a temporary file and verify existence."""
90
- try:
91
- # Use NamedTemporaryFile to ensure persistence
92
- with tempfile.NamedTemporaryFile(suffix='.png', delete=False, dir=self.temp_dir) as tmp_file:
93
- plot_path = tmp_file.name
94
- fig.savefig(plot_path, dpi=300, bbox_inches='tight', format='png')
95
- plt.close(fig)
96
- if not Path(plot_path).exists():
97
- logger.error(f"Plot file not created: {plot_path}")
98
- return None
99
- self.plot_files.append(plot_path)
100
- logger.info(f"Saved plot: {plot_path}")
101
- return str(plot_path)
102
- except Exception as e:
103
- logger.error(f"Error saving plot {filename}: {str(e)}")
104
- plt.close(fig)
105
- return None
106
 
107
- def extract_basic_features(self, audio_path: str, sr: int = 16000, max_duration: float = 60.0,
108
- progress=gr.Progress()) -> Tuple[Optional[str], Optional[str], Optional[str]]:
109
- """Extract basic audio features and generate visualizations."""
110
  if not audio_path or not Path(audio_path).exists():
111
- return None, None, "Invalid or missing audio file"
112
 
113
  try:
114
  progress(0.1, desc="Loading audio...")
115
- y, sr = librosa.load(audio_path, sr=sr)
116
- duration = librosa.get_duration(y=y, sr=sr)
117
-
 
 
118
  if duration > max_duration:
119
  y = y[:int(sr * max_duration)]
120
  duration = max_duration
121
 
122
- progress(0.3, desc="Computing features...")
123
- features: Dict[str, Any] = {
124
- 'duration': duration,
125
- 'sample_rate': sr,
126
- 'samples': len(y),
127
- 'tempo': float(librosa.beat.beat_track(y=y, sr=sr)[0]), # Convert to float
128
- 'mfcc': librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13),
129
- 'spectral_centroid': librosa.feature.spectral_centroid(y=y, sr=sr)[0],
130
- 'spectral_rolloff': librosa.feature.spectral_rolloff(y=y, sr=sr)[0],
131
- 'zero_crossing_rate': librosa.feature.zero_crossing_rate(y)[0]
132
- }
133
-
134
- progress(0.5, desc="Computing mel spectrogram...")
135
- hop_length = 512
136
- S_mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length, n_mels=80)
137
- S_dB = librosa.power_to_db(S_mel, ref=np.max)
138
-
139
- progress(0.8, desc="Creating visualizations...")
140
- fig, axes = plt.subplots(2, 2, figsize=(15, 10))
141
-
142
- time_axis = np.linspace(0, duration, len(y))
143
- axes[0, 0].plot(time_axis, y)
144
- axes[0, 0].set_title('Waveform')
145
- axes[0, 0].set_xlabel('Time (s)')
146
- axes[0, 0].set_ylabel('Amplitude')
147
-
148
- librosa.display.specshow(S_dB, sr=sr, hop_length=hop_length,
149
- x_axis='time', y_axis='mel', ax=axes[0, 1])
150
- axes[0, 1].set_title('Mel Spectrogram')
151
-
152
- librosa.display.specshow(features['mfcc'], sr=sr, x_axis='time', ax=axes[1, 0])
153
- axes[1, 0].set_title('MFCC')
154
-
155
- times = librosa.frames_to_time(range(len(features['spectral_centroid'])), sr=sr, hop_length=hop_length)
156
- axes[1, 1].plot(times, features['spectral_centroid'], label='Spectral Centroid')
157
- axes[1, 1].plot(times, features['spectral_rolloff'], label='Spectral Rolloff')
158
- axes[1, 1].set_title('Spectral Features')
159
- axes[1, 1].set_xlabel('Time (s)')
160
- axes[1, 1].legend()
161
-
162
- plt.tight_layout()
163
- plot_path = self.save_plot(fig, "basic_features")
164
- if not plot_path:
165
- return None, None, "Failed to save feature visualizations"
166
-
167
- # Validate feature shapes
168
- for key in ['mfcc', 'spectral_centroid', 'spectral_rolloff', 'zero_crossing_rate']:
169
- if not isinstance(features[key].shape, tuple):
170
- logger.error(f"Invalid shape for {key}: {features[key].shape}")
171
- return None, None, f"Invalid feature shape for {key}"
172
-
173
- summary = f"""
174
- **Audio Summary:**
175
- - Duration: {duration:.2f} seconds
176
- - Sample Rate: {sr} Hz
177
- - Estimated Tempo: {features['tempo']:.1f} BPM
178
- - Number of Samples: {features['samples']:,}
179
-
180
- **Feature Shapes:**
181
- - MFCC: {features['mfcc'].shape}
182
- - Spectral Centroid: {features['spectral_centroid'].shape}
183
- - Spectral Rolloff: {features['spectral_rolloff'].shape}
184
- - Zero Crossing Rate: {features['zero_crossing_rate'].shape}
185
- """
186
-
187
- progress(1.0, desc="Analysis complete!")
188
- return plot_path, summary, None
189
-
190
  except Exception as e:
191
- logger.error(f"Error processing audio: {str(e)}")
192
- return None, None, f"Error processing audio: {str(e)}"
193
-
194
- def extract_chroma_features(self, audio_path: str, sr: int = 16000, max_duration: float = 30.0,
195
- progress=gr.Progress()) -> Tuple[Optional[str], Optional[str], Optional[str]]:
196
- """Extract and visualize enhanced chroma features."""
197
- if not audio_path or not Path(audio_path).exists():
198
- return None, None, "Invalid or missing audio file"
199
-
200
- try:
201
- progress(0.1, desc="Loading audio...")
202
- y, sr = librosa.load(audio_path, sr=sr)
203
- if len(y) > sr * max_duration:
204
- y = y[:int(sr * max_duration)]
205
-
206
- progress(0.3, desc="Computing chroma variants...")
207
- chroma_orig = librosa.feature.chroma_cqt(y=y, sr=sr)
208
- y_harm = librosa.effects.harmonic(y=y, margin=8)
209
- chroma_harm = librosa.feature.chroma_cqt(y=y_harm, sr=sr)
210
- chroma_filter = np.minimum(chroma_harm,
211
- librosa.decompose.nn_filter(chroma_harm,
212
- aggregate=np.median,
213
- metric='cosine'))
214
- chroma_smooth = scipy.ndimage.median_filter(chroma_filter, size=(1, 9))
215
- chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
216
- chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)
217
-
218
- progress(0.8, desc="Creating visualizations...")
219
- fig, axes = plt.subplots(3, 2, figsize=(15, 12))
220
- axes = axes.flatten()
221
-
222
- for i, (chroma, title) in enumerate([
223
- (chroma_orig, 'Original Chroma (CQT)'),
224
- (chroma_harm, 'Harmonic Chroma'),
225
- (chroma_filter, 'Non-local Filtered'),
226
- (chroma_smooth, 'Median Filtered'),
227
- (chroma_stft, 'Chroma (STFT)'),
228
- (chroma_cens, 'CENS Features')
229
- ]):
230
- librosa.display.specshow(chroma, y_axis='chroma', x_axis='time', ax=axes[i])
231
- axes[i].set_title(title)
232
-
233
- plt.tight_layout()
234
- plot_path = self.save_plot(fig, "chroma_features")
235
- if not plot_path:
236
- return None, None, "Failed to save chroma visualizations"
237
-
238
- summary = "Chroma feature analysis complete! Visualizations show different chroma extraction methods for harmonic analysis."
239
- progress(1.0, desc="Chroma analysis complete!")
240
- return plot_path, summary, None
241
-
242
- except Exception as e:
243
- logger.error(f"Error processing chroma features: {str(e)}")
244
- return None, None, f"Error processing chroma features: {str(e)}"
245
-
246
- def generate_patches(self, audio_path: str, sr: int = 16000, patch_duration: float = 5.0,
247
- hop_duration: float = 1.0, progress=gr.Progress()) -> Tuple[Optional[str], Optional[str], Optional[str]]:
248
- """Generate fixed-duration patches for transformer input."""
249
- if not audio_path or not Path(audio_path).exists():
250
- return None, None, "Invalid or missing audio file"
251
-
252
- try:
253
- progress(0.1, desc="Loading audio...")
254
- y, sr = librosa.load(audio_path, sr=sr)
255
-
256
- progress(0.3, desc="Computing mel spectrogram...")
257
- hop_length = 512
258
- S_mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length, n_mels=80)
259
- S_dB = librosa.power_to_db(S_mel, ref=np.max)
260
-
261
- progress(0.5, desc="Generating patches...")
262
- patch_frames = librosa.time_to_frames(patch_duration, sr=sr, hop_length=hop_length)
263
- hop_frames = librosa.time_to_frames(hop_duration, sr=sr, hop_length=hop_length)
264
- patches = librosa.util.frame(S_dB, frame_length=patch_frames, hop_length=hop_frames)
265
-
266
- progress(0.8, desc="Creating visualizations...")
267
- num_patches_to_show = min(6, patches.shape[-1])
268
- fig, axes = plt.subplots(2, 3, figsize=(18, 8))
269
- axes = axes.flatten()
270
-
271
- for i in range(num_patches_to_show):
272
- librosa.display.specshow(patches[..., i], y_axis='mel', x_axis='time',
273
- ax=axes[i], sr=sr, hop_length=hop_length)
274
- axes[i].set_title(f'Patch {i+1}')
275
-
276
- for i in range(num_patches_to_show, len(axes)):
277
- axes[i].set_visible(False)
278
-
279
- plt.tight_layout()
280
- plot_path = self.save_plot(fig, "patches")
281
- if not plot_path:
282
- return None, None, "Failed to save patch visualizations"
283
-
284
- summary = f"""
285
- **Patch Generation Summary:**
286
- - Total patches generated: {patches.shape[-1]}
287
- - Patch duration: {patch_duration:.1f} seconds
288
- - Hop duration: {hop_duration:.1f} seconds
289
- - Patch shape (mels, time, patches): {patches.shape}
290
- - Each patch covers {patch_frames} time frames
291
- """
292
-
293
- progress(1.0, desc="Patch generation complete!")
294
- return plot_path, summary, None
295
-
296
- except Exception as e:
297
- logger.error(f"Error generating patches: {str(e)}")
298
- return None, None, f"Error generating patches: {str(e)}"
299
-
300
- def create_gradio_interface() -> gr.Blocks:
301
- """Create a modular Gradio interface for audio analysis."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
  analyzer = AudioAnalyzer()
303
-
304
- with gr.Blocks(title="🎵 Audio Analysis Suite", theme=gr.themes.Soft()) as demo:
305
- gr.Markdown("""
306
- # 🎵 Audio Analysis Suite
307
-
308
- Analyze audio from YouTube videos or uploaded files. Extract features or generate transformer patches for deep learning applications.
309
-
310
- **Features:**
311
- - 📊 **Basic Features**: Waveform, Mel Spectrogram, MFCC, Spectral Analysis, Tempo Detection
312
- - 🎼 **Chroma Features**: Harmonic content analysis with multiple extraction methods
313
- - 🧩 **Transformer Patches**: Fixed-duration patches for deep learning
314
-
315
- **Requirements**: Dependencies are automatically installed in Hugging Face Spaces via `requirements.txt`.
316
- """)
317
-
318
  with gr.Row():
319
- with gr.Column(scale=1):
320
- gr.Markdown("### 📁 Audio Input")
321
- with gr.Group():
322
- gr.Markdown("**Download from YouTube** (Supported formats: MP3, WAV, etc.)")
323
- youtube_url = gr.Textbox(
324
- label="YouTube URL",
325
- placeholder="https://www.youtube.com/watch?v=...",
326
- )
327
- download_btn = gr.Button("📥 Download Audio", variant="primary")
328
- download_status = gr.Textbox(label="Download Status", interactive=False)
329
-
330
- with gr.Group():
331
- gr.Markdown("**Or upload audio file** (Supported formats: MP3, WAV, FLAC, etc.)")
332
- audio_file = gr.Audio(
333
- label="Upload Audio File",
334
- type="filepath",
335
- )
336
-
337
- with gr.Column(scale=2):
338
- gr.Markdown("### 🔍 Analysis Results")
339
- with gr.Tabs():
340
- with gr.Tab("📊 Basic Features"):
341
- basic_plot = gr.Image(label="Feature Visualizations")
342
- basic_summary = gr.Markdown(label="Feature Summary")
343
- basic_btn = gr.Button("🔍 Analyze Basic Features", variant="secondary")
344
-
345
- with gr.Tab("🎼 Chroma Features"):
346
- chroma_plot = gr.Image(label="Chroma Visualizations")
347
- chroma_summary = gr.Markdown(label="Chroma Summary")
348
- chroma_btn = gr.Button("🎼 Analyze Chroma Features", variant="secondary")
349
-
350
- with gr.Tab("🧩 Transformer Patches"):
351
- with gr.Row():
352
- patch_duration = gr.Slider(
353
- label="Patch Duration (seconds)",
354
- minimum=1.0, maximum=10.0, value=5.0, step=0.5,
355
- )
356
- hop_duration = gr.Slider(
357
- label="Hop Duration (seconds)",
358
- minimum=0.1, maximum=5.0, value=1.0, step=0.1,
359
- )
360
- patches_plot = gr.Image(label="Generated Patches")
361
- patches_summary = gr.Markdown(label="Patch Summary")
362
- patches_btn = gr.Button("🧩 Generate Patches", variant="secondary")
363
-
364
- error_output = gr.Textbox(label="Error Messages", interactive=False)
365
-
366
- gr.Markdown("""
367
- ### ℹ️ Usage Tips
368
- - **Processing Limits**: 60s for basic features, 30s for chroma features for fast response
369
- - **YouTube Downloads**: Ensure URLs are valid and respect YouTube's terms of service
370
- - **Visualizations**: High-quality, suitable for research and education
371
- - **Storage**: Temporary files are cleaned up when the interface closes
372
- - **Support**: For issues, check the [GitHub repository](https://github.com/your-repo)
373
- """)
374
-
375
  # Event handlers
376
  download_btn.click(
377
- fn=analyzer.download_youtube_audio,
378
  inputs=[youtube_url],
379
- outputs=[audio_file, download_status]
380
  )
381
-
382
- basic_btn.click(
383
- fn=analyzer.extract_basic_features,
384
- inputs=[audio_file],
385
- outputs=[basic_plot, basic_summary, error_output]
386
- )
387
-
388
- chroma_btn.click(
389
- fn=analyzer.extract_chroma_features,
390
- inputs=[audio_file],
391
- outputs=[chroma_plot, chroma_summary, error_output]
392
  )
393
-
394
- patches_btn.click(
395
- fn=analyzer.generate_patches,
396
- inputs=[audio_file, patch_duration, hop_duration],
397
- outputs=[patches_plot, patches_summary, error_output]
 
398
  )
399
-
400
- audio_file.change(
401
- fn=analyzer.extract_basic_features,
402
- inputs=[audio_file],
403
- outputs=[basic_plot, basic_summary, error_output]
404
- )
405
-
406
- demo.unload(fn=analyzer.cleanup)
407
-
408
-
409
-
410
-
411
-
412
-
413
-
414
-
415
-
416
-
417
-
418
-
419
-
420
-
421
-
422
-
423
-
424
-
425
  return demo
426
 
427
-
428
-
429
-
430
-
431
-
432
-
433
-
434
-
435
-
436
-
437
-
438
-
439
-
440
-
441
-
442
-
443
-
444
-
445
-
446
-
447
-
448
-
449
-
450
-
451
-
452
  if __name__ == "__main__":
453
- demo = create_gradio_interface()
454
  demo.launch()
 
1
  import gradio as gr
2
  import subprocess
 
3
  import tempfile
4
  import librosa
5
  import librosa.display
 
7
  import numpy as np
8
  import scipy.ndimage
9
  from pathlib import Path
 
10
  import logging
11
  import warnings
 
 
 
12
  import shutil
13
+ from typing import Tuple, Optional
14
 
15
+ # Configure matplotlib and logging
16
  plt.switch_backend('Agg')
17
  warnings.filterwarnings('ignore')
18
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 
 
 
 
 
 
19
  logger = logging.getLogger(__name__)
20
 
21
  class AudioAnalyzer:
22
+ def __init__(self):
23
+ self.temp_dir = Path(tempfile.mkdtemp())
24
+ self.plot_files = []
 
 
 
 
 
25
 
26
+ def cleanup(self):
 
27
  for plot_file in self.plot_files:
28
+ Path(plot_file).unlink(missing_ok=True)
29
+ shutil.rmtree(self.temp_dir, ignore_errors=True)
 
 
 
 
 
 
 
30
 
31
  def download_youtube_audio(self, video_url: str, progress=gr.Progress()) -> Tuple[Optional[str], str]:
 
32
  if not video_url:
33
  return None, "Please provide a valid YouTube URL"
34
 
35
+ progress(0.1, desc="Downloading...")
36
+ output_file = self.temp_dir / "audio.mp3"
37
+
 
 
 
 
 
 
 
 
 
 
 
 
38
  try:
39
+ subprocess.run([
40
+ "yt-dlp", "-x", "--audio-format", "mp3",
41
+ "-o", str(output_file), video_url
42
+ ], check=True, capture_output=True)
43
+ progress(1.0, desc="Complete!")
44
+ return str(output_file), "Download successful"
45
  except FileNotFoundError:
46
+ return None, "yt-dlp not found. Install with: pip install yt-dlp"
47
  except subprocess.CalledProcessError as e:
48
  return None, f"Download failed: {e.stderr}"
 
 
 
49
 
50
+ def save_plot(self, fig) -> str:
51
+ plot_path = self.temp_dir / f"plot_{len(self.plot_files)}.png"
52
+ fig.savefig(plot_path, dpi=150, bbox_inches='tight')
53
+ plt.close(fig)
54
+ self.plot_files.append(str(plot_path))
55
+ return str(plot_path)
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
+ def analyze_audio(self, audio_path: str, analysis_type: str = "basic",
58
+ patch_duration: float = 5.0, progress=gr.Progress()) -> Tuple[Optional[str], str]:
 
59
  if not audio_path or not Path(audio_path).exists():
60
+ return None, "No audio file provided"
61
 
62
  try:
63
  progress(0.1, desc="Loading audio...")
64
+ y, sr = librosa.load(audio_path, sr=22050)
65
+ duration = len(y) / sr
66
+
67
+ # Limit duration for processing
68
+ max_duration = 60 if analysis_type == "basic" else 30
69
  if duration > max_duration:
70
  y = y[:int(sr * max_duration)]
71
  duration = max_duration
72
 
73
+ if analysis_type == "basic":
74
+ return self._basic_analysis(y, sr, duration, progress)
75
+ elif analysis_type == "chroma":
76
+ return self._chroma_analysis(y, sr, progress)
77
+ elif analysis_type == "patches":
78
+ return self._patch_analysis(y, sr, patch_duration, progress)
79
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  except Exception as e:
81
+ logger.error(f"Analysis error: {e}")
82
+ return None, f"Analysis failed: {str(e)}"
83
+
84
+ def _basic_analysis(self, y, sr, duration, progress):
85
+ progress(0.3, desc="Computing features...")
86
+
87
+ # Extract features
88
+ tempo = float(librosa.beat.beat_track(y=y, sr=sr)[0])
89
+ mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
90
+ spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
91
+ spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)[0]
92
+
93
+ progress(0.6, desc="Creating visualizations...")
94
+
95
+ # Create mel spectrogram
96
+ S_mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=80)
97
+ S_dB = librosa.power_to_db(S_mel, ref=np.max)
98
+
99
+ # Plot
100
+ fig, axes = plt.subplots(2, 2, figsize=(12, 8))
101
+
102
+ # Waveform
103
+ time = np.linspace(0, duration, len(y))
104
+ axes[0, 0].plot(time, y, alpha=0.8)
105
+ axes[0, 0].set_title('Waveform', fontweight='bold')
106
+ axes[0, 0].set_xlabel('Time (s)')
107
+
108
+ # Mel Spectrogram
109
+ librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel', ax=axes[0, 1])
110
+ axes[0, 1].set_title('Mel Spectrogram', fontweight='bold')
111
+
112
+ # MFCC
113
+ librosa.display.specshow(mfcc, sr=sr, x_axis='time', ax=axes[1, 0])
114
+ axes[1, 0].set_title('MFCC Features', fontweight='bold')
115
+
116
+ # Spectral features
117
+ times = librosa.frames_to_time(range(len(spectral_centroid)), sr=sr)
118
+ axes[1, 1].plot(times, spectral_centroid, label='Centroid', linewidth=2)
119
+ axes[1, 1].plot(times, spectral_rolloff, label='Rolloff', linewidth=2)
120
+ axes[1, 1].set_title('Spectral Features', fontweight='bold')
121
+ axes[1, 1].legend()
122
+ axes[1, 1].set_xlabel('Time (s)')
123
+
124
+ plt.tight_layout()
125
+ plot_path = self.save_plot(fig)
126
+
127
+ summary = f"""**Audio Analysis Results**
128
+ - Duration: {duration:.1f}s | Sample Rate: {sr:,} Hz
129
+ - Tempo: {tempo:.1f} BPM | Samples: {len(y):,}
130
+ - MFCC shape: {mfcc.shape} | Features extracted successfully"""
131
+
132
+ progress(1.0, desc="Complete!")
133
+ return plot_path, summary
134
+
135
+ def _chroma_analysis(self, y, sr, progress):
136
+ progress(0.3, desc="Computing chroma features...")
137
+
138
+ # Different chroma extraction methods
139
+ chroma_cqt = librosa.feature.chroma_cqt(y=y, sr=sr)
140
+ chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
141
+
142
+ # Harmonic separation
143
+ y_harm = librosa.effects.harmonic(y=y)
144
+ chroma_harm = librosa.feature.chroma_cqt(y=y_harm, sr=sr)
145
+
146
+ progress(0.7, desc="Creating visualizations...")
147
+
148
+ fig, axes = plt.subplots(2, 2, figsize=(12, 8))
149
+
150
+ # Plot different chroma features
151
+ chromas = [
152
+ (chroma_cqt, 'Chroma (CQT)'),
153
+ (chroma_stft, 'Chroma (STFT)'),
154
+ (chroma_harm, 'Harmonic Chroma'),
155
+ (chroma_cqt - chroma_harm, 'Chroma Difference')
156
+ ]
157
+
158
+ for i, (chroma, title) in enumerate(chromas):
159
+ ax = axes[i//2, i%2]
160
+ librosa.display.specshow(chroma, y_axis='chroma', x_axis='time', ax=ax)
161
+ ax.set_title(title, fontweight='bold')
162
+
163
+ plt.tight_layout()
164
+ plot_path = self.save_plot(fig)
165
+
166
+ summary = f"""**Chroma Analysis Results**
167
+ - Multiple chroma extraction methods compared
168
+ - CQT vs STFT analysis | Harmonic separation applied
169
+ - Chroma shape: {chroma_cqt.shape}"""
170
+
171
+ progress(1.0, desc="Complete!")
172
+ return plot_path, summary
173
+
174
+ def _patch_analysis(self, y, sr, patch_duration, progress):
175
+ progress(0.3, desc="Generating patches...")
176
+
177
+ # Create mel spectrogram
178
+ hop_length = 512
179
+ S_mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length, n_mels=80)
180
+ S_dB = librosa.power_to_db(S_mel, ref=np.max)
181
+
182
+ # Generate patches
183
+ patch_frames = librosa.time_to_frames(patch_duration, sr=sr, hop_length=hop_length)
184
+ hop_frames = patch_frames // 2 # 50% overlap
185
+ patches = librosa.util.frame(S_dB, frame_length=patch_frames, hop_length=hop_frames)
186
+
187
+ progress(0.7, desc="Creating visualizations...")
188
+
189
+ # Show first 6 patches
190
+ num_show = min(6, patches.shape[-1])
191
+ fig, axes = plt.subplots(2, 3, figsize=(15, 8))
192
+ axes = axes.flatten()
193
+
194
+ for i in range(num_show):
195
+ librosa.display.specshow(patches[..., i], y_axis='mel', x_axis='time',
196
+ ax=axes[i], sr=sr, hop_length=hop_length)
197
+ axes[i].set_title(f'Patch {i+1}', fontweight='bold')
198
+
199
+ # Hide unused subplots
200
+ for i in range(num_show, 6):
201
+ axes[i].set_visible(False)
202
+
203
+ plt.tight_layout()
204
+ plot_path = self.save_plot(fig)
205
+
206
+ summary = f"""**Patch Generation Results**
207
+ - Total patches: {patches.shape[-1]} | Duration: {patch_duration}s each
208
+ - Patch shape: {patches.shape} | 50% overlap between patches
209
+ - Ready for transformer input"""
210
+
211
+ progress(1.0, desc="Complete!")
212
+ return plot_path, summary
213
+
214
+ def create_interface():
215
  analyzer = AudioAnalyzer()
216
+
217
+ with gr.Blocks(title="Audio Analysis Suite") as demo:
218
+ gr.Markdown("# 🎵 Audio Analysis Suite")
219
+
 
 
 
 
 
 
 
 
 
 
 
220
  with gr.Row():
221
+ with gr.Column():
222
+ # Input section
223
+ gr.Markdown("### Input")
224
+ youtube_url = gr.Textbox(label="YouTube URL", placeholder="https://youtube.com/watch?v=...")
225
+ download_btn = gr.Button("Download Audio")
226
+
227
+ audio_file = gr.Audio(label="Or upload audio file", type="filepath")
228
+
229
+ # Analysis options
230
+ gr.Markdown("### Analysis Options")
231
+ analysis_type = gr.Radio(
232
+ choices=["basic", "chroma", "patches"],
233
+ value="basic",
234
+ label="Analysis Type"
235
+ )
236
+ patch_duration = gr.Slider(1, 10, 5, step=0.5, label="Patch Duration (s)",
237
+ visible=False)
238
+
239
+ analyze_btn = gr.Button("Analyze Audio", variant="primary")
240
+
241
+ with gr.Column():
242
+ # Results
243
+ gr.Markdown("### Results")
244
+ plot_output = gr.Image(label="Visualizations")
245
+ summary_output = gr.Markdown()
246
+ status_output = gr.Textbox(label="Status", interactive=False)
247
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  # Event handlers
249
  download_btn.click(
250
+ analyzer.download_youtube_audio,
251
  inputs=[youtube_url],
252
+ outputs=[audio_file, status_output]
253
  )
254
+
255
+ analyze_btn.click(
256
+ analyzer.analyze_audio,
257
+ inputs=[audio_file, analysis_type, patch_duration],
258
+ outputs=[plot_output, summary_output]
 
 
 
 
 
 
259
  )
260
+
261
+ # Show patch duration slider only for patches analysis
262
+ analysis_type.change(
263
+ lambda x: gr.update(visible=(x == "patches")),
264
+ inputs=[analysis_type],
265
+ outputs=[patch_duration]
266
  )
267
+
268
+ demo.unload(analyzer.cleanup)
269
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  return demo
271
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
272
  if __name__ == "__main__":
273
+ demo = create_interface()
274
  demo.launch()