SingA

Sleeping

App Files Files Community

latterworks commited on May 25

Commit

1902030

verified ·

1 Parent(s): d7fc5bc

Update app.py

Browse files

Files changed (1) hide show

app.py +252 -327

app.py CHANGED Viewed

@@ -8,443 +8,368 @@ import matplotlib.pyplot as plt
 import numpy as np
 import scipy.ndimage
 from pathlib import Path
 import warnings
-warnings.filterwarnings('ignore')
-# Set matplotlib backend for web display
 plt.switch_backend('Agg')
 class AudioAnalyzer:
-    def __init__(self):
-        self.temp_dir = tempfile.mkdtemp()
-    def download_youtube_audio(self, video_url, progress=gr.Progress()):
-        """Download audio from YouTube video using yt-dlp."""
         if not video_url:
-            return None, "Please provide a YouTube URL"
         progress(0.1, desc="Initializing download...")
-        output_dir = os.path.join(self.temp_dir, "downloaded_audio")
-        os.makedirs(output_dir, exist_ok=True)
-        # yt-dlp command to extract audio in mp3 format
         command = [
             "yt-dlp",
             "-x",
             "--audio-format", "mp3",
-            "-o", os.path.join(output_dir, "%(title)s.%(ext)s"),
             "--no-playlist",
             "--restrict-filenames",
             video_url
         ]
         try:
             progress(0.3, desc="Downloading audio...")
-            result = subprocess.run(command, check=True, capture_output=True, text=True)
-            # Find the downloaded file
-            for file in os.listdir(output_dir):
-                if file.endswith('.mp3'):
-                    file_path = os.path.join(output_dir, file)
-                    progress(1.0, desc="Download complete!")
-                    return file_path, f"Successfully downloaded: {file}"
-            return None, "Download completed but no audio file found"
         except FileNotFoundError:
-            return None, "yt-dlp not found. Please install it: pip install yt-dlp"
         except subprocess.CalledProcessError as e:
             return None, f"Download failed: {e.stderr}"
         except Exception as e:
             return None, f"Unexpected error: {str(e)}"
-    def extract_basic_features(self, audio_path, sr=16000, progress=gr.Progress()):
-        """Extract basic audio features and create visualizations."""
-        if not audio_path or not os.path.exists(audio_path):
-            return None, None, "Invalid audio file"
         try:
             progress(0.1, desc="Loading audio...")
             y, sr = librosa.load(audio_path, sr=sr)
             duration = librosa.get_duration(y=y, sr=sr)
-            # Limit to first 60 seconds for processing speed
-            max_duration = 60
             if duration > max_duration:
-                y = y[:sr * max_duration]
                 duration = max_duration
             progress(0.3, desc="Computing features...")
-            # Basic features
-            features = {}
-            features['duration'] = duration
-            features['sample_rate'] = sr
-            features['samples'] = len(y)
-            # Mel spectrogram
             progress(0.5, desc="Computing mel spectrogram...")
             hop_length = 512
-            S_mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length)
             S_dB = librosa.power_to_db(S_mel, ref=np.max)
-            # Other features
-            features['tempo'], _ = librosa.beat.beat_track(y=y, sr=sr)
-            features['mfcc'] = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
-            features['spectral_centroid'] = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
-            features['spectral_rolloff'] = librosa.feature.spectral_rolloff(y=y, sr=sr)[0]
-            features['zero_crossing_rate'] = librosa.feature.zero_crossing_rate(y)[0]
             progress(0.8, desc="Creating visualizations...")
-            # Create visualizations
             fig, axes = plt.subplots(2, 2, figsize=(15, 10))
-            # Waveform
             time_axis = librosa.frames_to_time(range(len(y)), sr=sr)
             axes[0, 0].plot(time_axis, y)
             axes[0, 0].set_title('Waveform')
             axes[0, 0].set_xlabel('Time (s)')
             axes[0, 0].set_ylabel('Amplitude')
-            # Mel spectrogram
             librosa.display.specshow(S_dB, sr=sr, hop_length=hop_length,
-                                   x_axis='time', y_axis='mel', ax=axes[0, 1])
             axes[0, 1].set_title('Mel Spectrogram')
-            # MFCC
             librosa.display.specshow(features['mfcc'], sr=sr, x_axis='time', ax=axes[1, 0])
             axes[1, 0].set_title('MFCC')
-            # Spectral features
             times = librosa.frames_to_time(range(len(features['spectral_centroid'])), sr=sr, hop_length=hop_length)
             axes[1, 1].plot(times, features['spectral_centroid'], label='Spectral Centroid')
             axes[1, 1].plot(times, features['spectral_rolloff'], label='Spectral Rolloff')
             axes[1, 1].set_title('Spectral Features')
             axes[1, 1].set_xlabel('Time (s)')
             axes[1, 1].legend()
             plt.tight_layout()
-            # Save plot
-            plot_path = os.path.join(self.temp_dir, f"basic_features_{np.random.randint(10000)}.png")
             plt.savefig(plot_path, dpi=150, bbox_inches='tight')
             plt.close()
-            # Create summary text
             summary = f"""
-            **Audio Summary:**
-            - Duration: {duration:.2f} seconds
-            - Sample Rate: {sr} Hz
-            - Estimated Tempo: {features['tempo']:.1f} BPM
-            - Number of Samples: {len(y):,}
-            **Feature Shapes:**
-            - MFCC: {features['mfcc'].shape}
-            - Spectral Centroid: {features['spectral_centroid'].shape}
-            - Spectral Rolloff: {features['spectral_rolloff'].shape}
-            - Zero Crossing Rate: {features['zero_crossing_rate'].shape}
             """
             progress(1.0, desc="Analysis complete!")
-            return plot_path, summary, None
         except Exception as e:
             return None, None, f"Error processing audio: {str(e)}"
-    def extract_chroma_features(self, audio_path, sr=16000, progress=gr.Progress()):
         """Extract and visualize enhanced chroma features."""
-        if not audio_path or not os.path.exists(audio_path):
-            return None, "Invalid audio file"
         try:
             progress(0.1, desc="Loading audio...")
             y, sr = librosa.load(audio_path, sr=sr)
-            # Limit to first 30 seconds for processing speed
-            max_duration = 30
             if len(y) > sr * max_duration:
-                y = y[:sr * max_duration]
             progress(0.3, desc="Computing chroma variants...")
-            # Original chroma
             chroma_orig = librosa.feature.chroma_cqt(y=y, sr=sr)
-            # Harmonic-percussive separation
             y_harm = librosa.effects.harmonic(y=y, margin=8)
             chroma_harm = librosa.feature.chroma_cqt(y=y_harm, sr=sr)
-            progress(0.6, desc="Applying filters...")
-            # Non-local filtering
             chroma_filter = np.minimum(chroma_harm,
                                      librosa.decompose.nn_filter(chroma_harm,
                                                                 aggregate=np.median,
                                                                 metric='cosine'))
-            # Median filtering
             chroma_smooth = scipy.ndimage.median_filter(chroma_filter, size=(1, 9))
-            # STFT-based chroma
             chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
-            # CENS features
             chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)
             progress(0.8, desc="Creating visualizations...")
-            # Create comprehensive visualization
             fig, axes = plt.subplots(3, 2, figsize=(15, 12))
-            # Original vs Harmonic
-            librosa.display.specshow(chroma_orig, y_axis='chroma', x_axis='time', ax=axes[0, 0])
-            axes[0, 0].set_title('Original Chroma (CQT)')
-            librosa.display.specshow(chroma_harm, y_axis='chroma', x_axis='time', ax=axes[0, 1])
-            axes[0, 1].set_title('Harmonic Chroma')
-            # Filtered vs Smooth
-            librosa.display.specshow(chroma_filter, y_axis='chroma', x_axis='time', ax=axes[1, 0])
-            axes[1, 0].set_title('Non-local Filtered')
-            librosa.display.specshow(chroma_smooth, y_axis='chroma', x_axis='time', ax=axes[1, 1])
-            axes[1, 1].set_title('Median Filtered')
-            # STFT vs CENS
-            librosa.display.specshow(chroma_stft, y_axis='chroma', x_axis='time', ax=axes[2, 0])
-            axes[2, 0].set_title('Chroma (STFT)')
-            librosa.display.specshow(chroma_cens, y_axis='chroma', x_axis='time', ax=axes[2, 1])
-            axes[2, 1].set_title('CENS Features')
             plt.tight_layout()
-            # Save plot
-            plot_path = os.path.join(self.temp_dir, f"chroma_features_{np.random.randint(10000)}.png")
             plt.savefig(plot_path, dpi=150, bbox_inches='tight')
             plt.close()
             progress(1.0, desc="Chroma analysis complete!")
-            return plot_path, None
         except Exception as e:
-            return None, f"Error processing chroma features: {str(e)}"
-    def generate_patches(self, audio_path, sr=16000, patch_duration=5.0, hop_duration=1.0, progress=gr.Progress()):
         """Generate fixed-duration patches for transformer input."""
-        if not audio_path or not os.path.exists(audio_path):
-            return None, None, "Invalid audio file"
         try:
             progress(0.1, desc="Loading audio...")
             y, sr = librosa.load(audio_path, sr=sr)
             progress(0.3, desc="Computing mel spectrogram...")
             hop_length = 512
             S_mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length, n_mels=80)
             S_dB = librosa.power_to_db(S_mel, ref=np.max)
             progress(0.5, desc="Generating patches...")
-            # Convert time to frames
             patch_frames = librosa.time_to_frames(patch_duration, sr=sr, hop_length=hop_length)
             hop_frames = librosa.time_to_frames(hop_duration, sr=sr, hop_length=hop_length)
-            # Generate patches using librosa.util.frame
             patches = librosa.util.frame(S_dB, frame_length=patch_frames, hop_length=hop_frames)
             progress(0.8, desc="Creating visualizations...")
-            # Visualize patches
             num_patches_to_show = min(6, patches.shape[-1])
             fig, axes = plt.subplots(2, 3, figsize=(18, 8))
             axes = axes.flatten()
             for i in range(num_patches_to_show):
                 librosa.display.specshow(patches[..., i], y_axis='mel', x_axis='time',
                                        ax=axes[i], sr=sr, hop_length=hop_length)
                 axes[i].set_title(f'Patch {i+1}')
-            # Hide unused subplots
             for i in range(num_patches_to_show, len(axes)):
                 axes[i].set_visible(False)
             plt.tight_layout()
-            # Save plot
-            plot_path = os.path.join(self.temp_dir, f"patches_{np.random.randint(10000)}.png")
             plt.savefig(plot_path, dpi=150, bbox_inches='tight')
             plt.close()
-            # Summary
             summary = f"""
-            **Patch Generation Summary:**
-            - Total patches generated: {patches.shape[-1]}
-            - Patch duration: {patch_duration} seconds
-            - Hop duration: {hop_duration} seconds
-            - Patch shape (mels, time, patches): {patches.shape}
-            - Each patch covers {patch_frames} time frames
             """
             progress(1.0, desc="Patch generation complete!")
-            return plot_path, summary, None
         except Exception as e:
             return None, None, f"Error generating patches: {str(e)}"
-# Initialize analyzer
-analyzer = AudioAnalyzer()
-# Gradio interface functions
-def process_youtube_url(url):
-    """Process YouTube URL and return audio file."""
-    file_path, message = analyzer.download_youtube_audio(url)
-    if file_path:
-        return file_path, message, gr.update(visible=True)
-    else:
-        return None, message, gr.update(visible=False)
-def analyze_audio_basic(audio_file):
-    """Analyze audio file and return basic features."""
-    if audio_file is None:
-        return None, "Please upload an audio file or download from YouTube first."
-    plot_path, summary, error = analyzer.extract_basic_features(audio_file)
-    if error:
-        return None, error
-    return plot_path, summary
-def analyze_audio_chroma(audio_file):
-    """Analyze audio file for chroma features."""
-    if audio_file is None:
-        return None, "Please upload an audio file or download from YouTube first."
-    plot_path, error = analyzer.extract_chroma_features(audio_file)
-    if error:
-        return None, error
-    return plot_path, "Chroma feature analysis complete! This shows different chroma extraction methods for harmonic analysis."
-def analyze_audio_patches(audio_file, patch_duration, hop_duration):
-    """Generate transformer patches from audio."""
-    if audio_file is None:
-        return None, None, "Please upload an audio file or download from YouTube first."
-    plot_path, summary, error = analyzer.generate_patches(audio_file, patch_duration=patch_duration, hop_duration=hop_duration)
-    if error:
-        return None, None, error
-    return plot_path, summary
-# Create Gradio interface
-with gr.Blocks(title="🎵 Audio Analysis Suite", theme=gr.themes.Soft()) as app:
-    gr.Markdown("""
-    # 🎵 Audio Analysis Suite
-    A comprehensive tool for audio feature extraction and analysis. Upload an audio file or download from YouTube to get started!
-    **Features:**
-    - 📊 **Basic Features**: Waveform, Mel Spectrogram, MFCC, Spectral Analysis, Tempo Detection
-    - 🎼 **Chroma Features**: Advanced harmonic content analysis with multiple extraction methods
-    - 🧩 **Transformer Patches**: Generate fixed-duration patches for deep learning applications
-    """)
-    with gr.Row():
-        with gr.Column(scale=1):
-            gr.Markdown("### 📁 Audio Input")
-            # YouTube downloader
-            with gr.Group():
-                gr.Markdown("**Download from YouTube:**")
-                youtube_url = gr.Textbox(
-                    label="YouTube URL",
-                    placeholder="https://www.youtube.com/watch?v=...",
-                    info="Paste a YouTube video URL to extract audio"
-                )
-                download_btn = gr.Button("📥 Download Audio", variant="primary")
-                download_status = gr.Textbox(label="Download Status", interactive=False)
-            # File upload
-            with gr.Group():
-                gr.Markdown("**Or upload audio file:**")
-                audio_file = gr.Audio(
-                    label="Upload Audio File",
-                    type="filepath",
-                    info="Supported formats: MP3, WAV, FLAC, etc."
-                )
-        with gr.Column(scale=2):
-            gr.Markdown("### 🔍 Analysis Results")
-            with gr.Tabs():
-                with gr.Tab("📊 Basic Features"):
-                    basic_plot = gr.Image(label="Feature Visualizations")
-                    basic_summary = gr.Markdown()
-                    basic_analyze_btn = gr.Button("🔍 Analyze Basic Features", variant="secondary")
-                with gr.Tab("🎼 Chroma Features"):
-                    chroma_plot = gr.Image(label="Chroma Visualizations")
-                    chroma_summary = gr.Markdown()
-                    chroma_analyze_btn = gr.Button("🎼 Analyze Chroma Features", variant="secondary")
-                with gr.Tab("🧩 Transformer Patches"):
-                    with gr.Row():
-                        patch_duration = gr.Slider(
-                            label="Patch Duration (seconds)",
-                            minimum=1.0, maximum=10.0, value=5.0, step=0.5,
-                            info="Duration of each patch"
-                        )
-                        hop_duration = gr.Slider(
-                            label="Hop Duration (seconds)",
-                            minimum=0.1, maximum=5.0, value=1.0, step=0.1,
-                            info="Time between patch starts"
-                        )
-                    patches_plot = gr.Image(label="Generated Patches")
-                    patches_summary = gr.Markdown()
-                    patches_analyze_btn = gr.Button("🧩 Generate Patches", variant="secondary")
-    gr.Markdown("""
-    ### ℹ️ Usage Tips
-    - **Processing is limited to 60 seconds** for basic features and 30 seconds for chroma analysis to ensure fast response times
-    - **YouTube downloads** respect platform terms of service
-    - **Visualizations** are high-quality and suitable for research/educational use
-    - **All processing** is done locally in your browser session
-    """)
-    # Event handlers
-    download_btn.click(
-        process_youtube_url,
-        inputs=[youtube_url],
-        outputs=[audio_file, download_status, basic_analyze_btn]
-    )
-    basic_analyze_btn.click(
-        analyze_audio_basic,
-        inputs=[audio_file],
-        outputs=[basic_plot, basic_summary]
-    )
-    chroma_analyze_btn.click(
-        analyze_audio_chroma,
-        inputs=[audio_file],
-        outputs=[chroma_plot, chroma_summary]
-    )
-    patches_analyze_btn.click(
-        analyze_audio_patches,
-        inputs=[audio_file, patch_duration, hop_duration],
-        outputs=[patches_plot, patches_summary]
-    )
-    # Auto-analyze when file is uploaded
-    audio_file.change(
-        analyze_audio_basic,
-        inputs=[audio_file],
-        outputs=[basic_plot, basic_summary]
-    )
-if __name__ == "__main__":
-    app.launch()

 import numpy as np
 import scipy.ndimage
 from pathlib import Path
+import logging
 import warnings
+import shutil
+from typing import Tuple, Optional, Dict, Any
+# Configure matplotlib for web display
 plt.switch_backend('Agg')
+warnings.filterwarnings('ignore')
+# Setup logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+    handlers=[logging.StreamHandler()]
+)
+logger = logging.getLogger(__name__)
 class AudioAnalyzer:
+    """Core class for audio analysis with modular feature extraction methods."""
+    def __init__(self, temp_dir: Optional[str] = None):
+        """Initialize with a temporary directory for file storage."""
+        self.temp_dir = Path(temp_dir or tempfile.mkdtemp())
+        self.temp_dir.mkdir(exist_ok=True)
+        logger.info(f"Initialized temporary directory: {self.temp_dir}")
+    def cleanup(self) -> None:
+        """Remove temporary directory and its contents."""
+        if self.temp_dir.exists():
+            shutil.rmtree(self.temp_dir)
+            logger.info(f"Cleaned up temporary directory: {self.temp_dir}")
+    def download_youtube_audio(self, video_url: str, progress=gr.Progress()) -> Tuple[Optional[str], str]:
+        """Download audio from YouTube using yt-dlp."""
         if not video_url:
+            return None, "Please provide a valid YouTube URL"
         progress(0.1, desc="Initializing download...")
+        output_dir = self.temp_dir / "downloaded_audio"
+        output_dir.mkdir(exist_ok=True)
+        output_file = output_dir / "audio.mp3"
         command = [
             "yt-dlp",
             "-x",
             "--audio-format", "mp3",
+            "-o", str(output_file),
             "--no-playlist",
             "--restrict-filenames",
             video_url
         ]
         try:
             progress(0.3, desc="Downloading audio...")
+            subprocess.run(command, check=True, capture_output=True, text=True)
+            progress(1.0, desc="Download complete!")
+            return str(output_file), f"Successfully downloaded audio: {output_file.name}"
         except FileNotFoundError:
+            return None, "yt-dlp not found. Install it with: pip install yt-dlp"
         except subprocess.CalledProcessError as e:
             return None, f"Download failed: {e.stderr}"
         except Exception as e:
+            logger.error(f"Unexpected error during download: {str(e)}")
             return None, f"Unexpected error: {str(e)}"
+    def extract_basic_features(self, audio_path: str, sr: int = 16000, max_duration: float = 60.0,
+                             progress=gr.Progress()) -> Tuple[Optional[str], Optional[str], Optional[str]]:
+        """Extract basic audio features and generate visualizations."""
+        if not audio_path or not Path(audio_path).exists():
+            return None, None, "Invalid or missing audio file"
         try:
             progress(0.1, desc="Loading audio...")
             y, sr = librosa.load(audio_path, sr=sr)
             duration = librosa.get_duration(y=y, sr=sr)
             if duration > max_duration:
+                y = y[:int(sr * max_duration)]
                 duration = max_duration
             progress(0.3, desc="Computing features...")
+            features: Dict[str, Any] = {
+                'duration': duration,
+                'sample_rate': sr,
+                'samples': len(y),
+                'tempo': librosa.beat.beat_track(y=y, sr=sr)[0],
+                'mfcc': librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13),
+                'spectral_centroid': librosa.feature.spectral_centroid(y=y, sr=sr)[0],
+                'spectral_rolloff': librosa.feature.spectral_rolloff(y=y, sr=sr)[0],
+                'zero_crossing_rate': librosa.feature.zero_crossing_rate(y)[0]
+            }
             progress(0.5, desc="Computing mel spectrogram...")
             hop_length = 512
+            S_mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length, n_mels=80)
             S_dB = librosa.power_to_db(S_mel, ref=np.max)
             progress(0.8, desc="Creating visualizations...")
             fig, axes = plt.subplots(2, 2, figsize=(15, 10))
             time_axis = librosa.frames_to_time(range(len(y)), sr=sr)
             axes[0, 0].plot(time_axis, y)
             axes[0, 0].set_title('Waveform')
             axes[0, 0].set_xlabel('Time (s)')
             axes[0, 0].set_ylabel('Amplitude')
             librosa.display.specshow(S_dB, sr=sr, hop_length=hop_length,
+                                    x_axis='time', y_axis='mel', ax=axes[0, 1])
             axes[0, 1].set_title('Mel Spectrogram')
             librosa.display.specshow(features['mfcc'], sr=sr, x_axis='time', ax=axes[1, 0])
             axes[1, 0].set_title('MFCC')
             times = librosa.frames_to_time(range(len(features['spectral_centroid'])), sr=sr, hop_length=hop_length)
             axes[1, 1].plot(times, features['spectral_centroid'], label='Spectral Centroid')
             axes[1, 1].plot(times, features['spectral_rolloff'], label='Spectral Rolloff')
             axes[1, 1].set_title('Spectral Features')
             axes[1, 1].set_xlabel('Time (s)')
             axes[1, 1].legend()
             plt.tight_layout()
+            plot_path = self.temp_dir / f"basic_features_{np.random.randint(10000)}.png"
             plt.savefig(plot_path, dpi=150, bbox_inches='tight')
             plt.close()
             summary = f"""
+**Audio Summary:**
+- Duration: {duration:.2f} seconds
+- Sample Rate: {sr} Hz
+- Estimated Tempo: {features['tempo']:.1f} BPM
+- Number of Samples: {len(y):,}
+**Feature Shapes:**
+- MFCC: {features['mfcc'].shape}
+- Spectral Centroid: {features['spectral_centroid'].shape}
+- Spectral Rolloff: {features['spectral_rolloff'].shape}
+- Zero Crossing Rate: {features['zero_crossing_rate'].shape}
             """
             progress(1.0, desc="Analysis complete!")
+            return str(plot_path), summary, None
         except Exception as e:
+            logger.error(f"Error processing audio: {str(e)}")
             return None, None, f"Error processing audio: {str(e)}"
+    def extract_chroma_features(self, audio_path: str, sr: int = 16000, max_duration: float = 30.0,
+                              progress=gr.Progress()) -> Tuple[Optional[str], Optional[str], Optional[str]]:
         """Extract and visualize enhanced chroma features."""
+        if not audio_path or not Path(audio_path).exists():
+            return None, None, "Invalid or missing audio file"
         try:
             progress(0.1, desc="Loading audio...")
             y, sr = librosa.load(audio_path, sr=sr)
             if len(y) > sr * max_duration:
+                y = y[:int(sr * max_duration)]
             progress(0.3, desc="Computing chroma variants...")
             chroma_orig = librosa.feature.chroma_cqt(y=y, sr=sr)
             y_harm = librosa.effects.harmonic(y=y, margin=8)
             chroma_harm = librosa.feature.chroma_cqt(y=y_harm, sr=sr)
             chroma_filter = np.minimum(chroma_harm,
                                      librosa.decompose.nn_filter(chroma_harm,
                                                                 aggregate=np.median,
                                                                 metric='cosine'))
             chroma_smooth = scipy.ndimage.median_filter(chroma_filter, size=(1, 9))
             chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
             chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)
             progress(0.8, desc="Creating visualizations...")
             fig, axes = plt.subplots(3, 2, figsize=(15, 12))
+            axes = axes.flatten()
+            for i, (chroma, title) in enumerate([
+                (chroma_orig, 'Original Chroma (CQT)'),
+                (chroma_harm, 'Harmonic Chroma'),
+                (chroma_filter, 'Non-local Filtered'),
+                (chroma_smooth, 'Median Filtered'),
+                (chroma_stft, 'Chroma (STFT)'),
+                (chroma_cens, 'CENS Features')
+            ]):
+                librosa.display.specshow(chroma, y_axis='chroma', x_axis='time', ax=axes[i])
+                axes[i].set_title(title)
             plt.tight_layout()
+            plot_path = self.temp_dir / f"chroma_features_{np.random.randint(10000)}.png"
             plt.savefig(plot_path, dpi=150, bbox_inches='tight')
             plt.close()
+            summary = "Chroma feature analysis complete! Visualizations show different chroma extraction methods for harmonic analysis."
             progress(1.0, desc="Chroma analysis complete!")
+            return str(plot_path), summary, None
         except Exception as e:
+            logger.error(f"Error processing chroma features: {str(e)}")
+            return None, None, f"Error processing chroma features: {str(e)}"
+    def generate_patches(self, audio_path: str, sr: int = 16000, patch_duration: float = 5.0,
+                        hop_duration: float = 1.0, progress=gr.Progress()) -> Tuple[Optional[str], Optional[str], Optional[str]]:
         """Generate fixed-duration patches for transformer input."""
+        if not audio_path or not Path(audio_path).exists():
+            return None, None, "Invalid or missing audio file"
         try:
             progress(0.1, desc="Loading audio...")
             y, sr = librosa.load(audio_path, sr=sr)
             progress(0.3, desc="Computing mel spectrogram...")
             hop_length = 512
             S_mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length, n_mels=80)
             S_dB = librosa.power_to_db(S_mel, ref=np.max)
             progress(0.5, desc="Generating patches...")
             patch_frames = librosa.time_to_frames(patch_duration, sr=sr, hop_length=hop_length)
             hop_frames = librosa.time_to_frames(hop_duration, sr=sr, hop_length=hop_length)
             patches = librosa.util.frame(S_dB, frame_length=patch_frames, hop_length=hop_frames)
             progress(0.8, desc="Creating visualizations...")
             num_patches_to_show = min(6, patches.shape[-1])
             fig, axes = plt.subplots(2, 3, figsize=(18, 8))
             axes = axes.flatten()
             for i in range(num_patches_to_show):
                 librosa.display.specshow(patches[..., i], y_axis='mel', x_axis='time',
                                        ax=axes[i], sr=sr, hop_length=hop_length)
                 axes[i].set_title(f'Patch {i+1}')
             for i in range(num_patches_to_show, len(axes)):
                 axes[i].set_visible(False)
             plt.tight_layout()
+            plot_path = self.temp_dir / f"patches_{np.random.randint(10000)}.png"
             plt.savefig(plot_path, dpi=150, bbox_inches='tight')
             plt.close()
             summary = f"""
+**Patch Generation Summary:**
+- Total patches generated: {patches.shape[-1]}
+- Patch duration: {patch_duration} seconds
+- Hop duration: {hop_duration} seconds
+- Patch shape (mels, time, patches): {patches.shape}
+- Each patch covers {patch_frames} time frames
             """
             progress(1.0, desc="Patch generation complete!")
+            return str(plot_path), summary, None
         except Exception as e:
+            logger.error(f"Error generating patches: {str(e)}")
             return None, None, f"Error generating patches: {str(e)}"
+def create_gradio_interface() -> gr.Blocks:
+    """Create a modular Gradio interface for audio analysis."""
+    analyzer = AudioAnalyzer()
+    with gr.Blocks(title="🎵 Audio Analysis Suite", theme=gr.themes.Soft()) as demo:
+        gr.Markdown("""
+        # 🎵 Audio Analysis Suite
+        Analyze audio from YouTube videos or uploaded files. Extract features or generate transformer patches for deep learning applications.
+        **Features:**
+        - 📊 **Basic Features**: Waveform, Mel Spectrogram, MFCC, Spectral Analysis, Tempo Detection
+        - 🎼 **Chroma Features**: Harmonic content analysis with multiple extraction methods
+        - 🧩 **Transformer Patches**: Fixed-duration patches for deep learning
+        **Requirements**: Install `yt-dlp` with `pip install yt-dlp`.
+        """)
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown("### 📁 Audio Input")
+                with gr.Group():
+                    gr.Markdown("**Download from YouTube** (Supported formats: MP3, WAV, etc.)")
+                    youtube_url = gr.Textbox(
+                        label="YouTube URL",
+                        placeholder="https://www.youtube.com/watch?v=...",
+                    )
+                    download_btn = gr.Button("📥 Download Audio", variant="primary")
+                    download_status = gr.Textbox(label="Download Status", interactive=False)
+                with gr.Group():
+                    gr.Markdown("**Or upload audio file** (Supported formats: MP3, WAV, FLAC, etc.)")
+                    audio_file = gr.Audio(
+                        label="Upload Audio File",
+                        type="filepath",
+                    )
+            with gr.Column(scale=2):
+                gr.Markdown("### 🔍 Analysis Results")
+                with gr.Tabs():
+                    with gr.Tab("📊 Basic Features"):
+                        basic_plot = gr.Image(label="Feature Visualizations")
+                        basic_summary = gr.Markdown(label="Feature Summary")
+                        basic_btn = gr.Button("🔍 Analyze Basic Features", variant="secondary")
+                    with gr.Tab("🎼 Chroma Features"):
+                        chroma_plot = gr.Image(label="Chroma Visualizations")
+                        chroma_summary = gr.Markdown(label="Chroma Summary")
+                        chroma_btn = gr.Button("🎼 Analyze Chroma Features", variant="secondary")
+                    with gr.Tab("🧩 Transformer Patches"):
+                        with gr.Row():
+                            patch_duration = gr.Slider(
+                                label="Patch Duration (seconds)",
+                                minimum=1.0, maximum=10.0, value=5.0, step=0.5,
+                            )
+                            hop_duration = gr.Slider(
+                                label="Hop Duration (seconds)",
+                                minimum=0.1, maximum=5.0, value=1.0, step=0.1,
+                            )
+                        patches_plot = gr.Image(label="Generated Patches")
+                        patches_summary = gr.Markdown(label="Patch Summary")
+                        patches_btn = gr.Button("🧩 Generate Patches", variant="secondary")
+                error_output = gr.Textbox(label="Error Messages", interactive=False)
+        gr.Markdown("""
+        ### ℹ️ Usage Tips
+        - **Processing Limits**: 60s for basic features, 30s for chroma features to ensure fast response times
+        - **YouTube Downloads**: Ensure URLs are valid and respect YouTube's terms of service
+        - **Visualizations**: High-quality, suitable for research and educational use
+        - **Storage**: Temporary files are automatically cleaned up when the interface closes
+        - **Support**: For issues, check the [GitHub repository](https://github.com/your-repo) or contact the developer
+        """)
+        # Event handlers
+        download_btn.click(
+            fn=analyzer.download_youtube_audio,
+            inputs=[youtube_url],
+            outputs=[audio_file, download_status]
+        )
+        basic_btn.click(
+            fn=analyzer.extract_basic_features,
+            inputs=[audio_file],
+            outputs=[basic_plot, basic_summary, error_output]
+        )
+        chroma_btn.click(
+            fn=analyzer.extract_chroma_features,
+            inputs=[audio_file],
+            outputs=[chroma_plot, chroma_summary, error_output]
+        )
+        patches_btn.click(
+            fn=analyzer.generate_patches,
+            inputs=[audio_file, patch_duration, hop_duration],
+            outputs=[patches_plot, patches_summary, error_output]
+        )
+        audio_file.change(
+            fn=analyzer.extract_basic_features,
+            inputs=[audio_file],
+            outputs=[basic_plot, basic_summary, error_output]
+        )
+        demo.unload(fn=analyzer.cleanup)
+    return demo
+if __name__ == "__main__":
+    demo = create_gradio_interface()
+    demo.launch()