Spaces:
Sleeping
Sleeping
import gradio as gr | |
import subprocess | |
import tempfile | |
import librosa | |
import librosa.display | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import scipy.ndimage | |
from pathlib import Path | |
import logging | |
import warnings | |
import shutil | |
from typing import Tuple, Optional | |
# Configure matplotlib and logging | |
plt.switch_backend('Agg') | |
warnings.filterwarnings('ignore') | |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") | |
logger = logging.getLogger(__name__) | |
class AudioAnalyzer: | |
def __init__(self): | |
self.temp_dir = Path(tempfile.mkdtemp()) | |
self.plot_files = [] | |
def cleanup(self): | |
for plot_file in self.plot_files: | |
Path(plot_file).unlink(missing_ok=True) | |
shutil.rmtree(self.temp_dir, ignore_errors=True) | |
def download_youtube_audio(self, video_url: str, progress=gr.Progress()) -> Tuple[Optional[str], str]: | |
if not video_url: | |
return None, "Please provide a valid YouTube URL" | |
progress(0.1, desc="Downloading...") | |
output_file = self.temp_dir / "audio.mp3" | |
try: | |
subprocess.run([ | |
"yt-dlp", "-x", "--audio-format", "mp3", | |
"-o", str(output_file), video_url | |
], check=True, capture_output=True) | |
progress(1.0, desc="Complete!") | |
return str(output_file), "Download successful" | |
except FileNotFoundError: | |
return None, "yt-dlp not found. Install with: pip install yt-dlp" | |
except subprocess.CalledProcessError as e: | |
return None, f"Download failed: {e.stderr}" | |
def save_plot(self, fig) -> str: | |
plot_path = self.temp_dir / f"plot_{len(self.plot_files)}.png" | |
fig.savefig(plot_path, dpi=150, bbox_inches='tight') | |
plt.close(fig) | |
self.plot_files.append(str(plot_path)) | |
return str(plot_path) | |
def analyze_audio(self, audio_path: str, analysis_type: str = "basic", | |
patch_duration: float = 5.0, progress=gr.Progress()) -> Tuple[Optional[str], str]: | |
if not audio_path or not Path(audio_path).exists(): | |
return None, "No audio file provided" | |
try: | |
progress(0.1, desc="Loading audio...") | |
y, sr = librosa.load(audio_path, sr=22050) | |
duration = len(y) / sr | |
# Limit duration for processing | |
max_duration = 60 if analysis_type == "basic" else 30 | |
if duration > max_duration: | |
y = y[:int(sr * max_duration)] | |
duration = max_duration | |
if analysis_type == "basic": | |
return self._basic_analysis(y, sr, duration, progress) | |
elif analysis_type == "chroma": | |
return self._chroma_analysis(y, sr, progress) | |
elif analysis_type == "patches": | |
return self._patch_analysis(y, sr, patch_duration, progress) | |
except Exception as e: | |
logger.error(f"Analysis error: {e}") | |
return None, f"Analysis failed: {str(e)}" | |
def _basic_analysis(self, y, sr, duration, progress): | |
progress(0.3, desc="Computing features...") | |
# Extract features | |
tempo = float(librosa.beat.beat_track(y=y, sr=sr)[0]) | |
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) | |
spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0] | |
spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)[0] | |
progress(0.6, desc="Creating visualizations...") | |
# Create mel spectrogram | |
S_mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=80) | |
S_dB = librosa.power_to_db(S_mel, ref=np.max) | |
# Plot | |
fig, axes = plt.subplots(2, 2, figsize=(12, 8)) | |
# Waveform | |
time = np.linspace(0, duration, len(y)) | |
axes[0, 0].plot(time, y, alpha=0.8) | |
axes[0, 0].set_title('Waveform', fontweight='bold') | |
axes[0, 0].set_xlabel('Time (s)') | |
# Mel Spectrogram | |
librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel', ax=axes[0, 1]) | |
axes[0, 1].set_title('Mel Spectrogram', fontweight='bold') | |
# MFCC | |
librosa.display.specshow(mfcc, sr=sr, x_axis='time', ax=axes[1, 0]) | |
axes[1, 0].set_title('MFCC Features', fontweight='bold') | |
# Spectral features | |
times = librosa.frames_to_time(range(len(spectral_centroid)), sr=sr) | |
axes[1, 1].plot(times, spectral_centroid, label='Centroid', linewidth=2) | |
axes[1, 1].plot(times, spectral_rolloff, label='Rolloff', linewidth=2) | |
axes[1, 1].set_title('Spectral Features', fontweight='bold') | |
axes[1, 1].legend() | |
axes[1, 1].set_xlabel('Time (s)') | |
plt.tight_layout() | |
plot_path = self.save_plot(fig) | |
summary = f"""**Audio Analysis Results** | |
- Duration: {duration:.1f}s | Sample Rate: {sr:,} Hz | |
- Tempo: {tempo:.1f} BPM | Samples: {len(y):,} | |
- MFCC shape: {mfcc.shape} | Features extracted successfully""" | |
progress(1.0, desc="Complete!") | |
return plot_path, summary | |
def _chroma_analysis(self, y, sr, progress): | |
progress(0.3, desc="Computing chroma features...") | |
# Different chroma extraction methods | |
chroma_cqt = librosa.feature.chroma_cqt(y=y, sr=sr) | |
chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr) | |
# Harmonic separation | |
y_harm = librosa.effects.harmonic(y=y) | |
chroma_harm = librosa.feature.chroma_cqt(y=y_harm, sr=sr) | |
progress(0.7, desc="Creating visualizations...") | |
fig, axes = plt.subplots(2, 2, figsize=(12, 8)) | |
# Plot different chroma features | |
chromas = [ | |
(chroma_cqt, 'Chroma (CQT)'), | |
(chroma_stft, 'Chroma (STFT)'), | |
(chroma_harm, 'Harmonic Chroma'), | |
(chroma_cqt - chroma_harm, 'Chroma Difference') | |
] | |
for i, (chroma, title) in enumerate(chromas): | |
ax = axes[i//2, i%2] | |
librosa.display.specshow(chroma, y_axis='chroma', x_axis='time', ax=ax) | |
ax.set_title(title, fontweight='bold') | |
plt.tight_layout() | |
plot_path = self.save_plot(fig) | |
summary = f"""**Chroma Analysis Results** | |
- Multiple chroma extraction methods compared | |
- CQT vs STFT analysis | Harmonic separation applied | |
- Chroma shape: {chroma_cqt.shape}""" | |
progress(1.0, desc="Complete!") | |
return plot_path, summary | |
def _patch_analysis(self, y, sr, patch_duration, progress): | |
progress(0.3, desc="Generating patches...") | |
# Create mel spectrogram | |
hop_length = 512 | |
S_mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length, n_mels=80) | |
S_dB = librosa.power_to_db(S_mel, ref=np.max) | |
# Generate patches | |
patch_frames = librosa.time_to_frames(patch_duration, sr=sr, hop_length=hop_length) | |
hop_frames = patch_frames // 2 # 50% overlap | |
patches = librosa.util.frame(S_dB, frame_length=patch_frames, hop_length=hop_frames) | |
progress(0.7, desc="Creating visualizations...") | |
# Show first 6 patches | |
num_show = min(6, patches.shape[-1]) | |
fig, axes = plt.subplots(2, 3, figsize=(15, 8)) | |
axes = axes.flatten() | |
for i in range(num_show): | |
librosa.display.specshow(patches[..., i], y_axis='mel', x_axis='time', | |
ax=axes[i], sr=sr, hop_length=hop_length) | |
axes[i].set_title(f'Patch {i+1}', fontweight='bold') | |
# Hide unused subplots | |
for i in range(num_show, 6): | |
axes[i].set_visible(False) | |
plt.tight_layout() | |
plot_path = self.save_plot(fig) | |
summary = f"""**Patch Generation Results** | |
- Total patches: {patches.shape[-1]} | Duration: {patch_duration}s each | |
- Patch shape: {patches.shape} | 50% overlap between patches | |
- Ready for transformer input""" | |
progress(1.0, desc="Complete!") | |
return plot_path, summary | |
def create_interface(): | |
analyzer = AudioAnalyzer() | |
with gr.Blocks(title="Audio Analysis Suite") as demo: | |
gr.Markdown("# 🎵 Audio Analysis Suite") | |
with gr.Row(): | |
with gr.Column(): | |
# Input section | |
gr.Markdown("### Input") | |
youtube_url = gr.Textbox(label="YouTube URL", placeholder="https://youtube.com/watch?v=...") | |
download_btn = gr.Button("Download Audio") | |
audio_file = gr.Audio(label="Or upload audio file", type="filepath") | |
# Analysis options | |
gr.Markdown("### Analysis Options") | |
analysis_type = gr.Radio( | |
choices=["basic", "chroma", "patches"], | |
value="basic", | |
label="Analysis Type" | |
) | |
patch_duration = gr.Slider(1, 10, 5, step=0.5, label="Patch Duration (s)", | |
visible=False) | |
analyze_btn = gr.Button("Analyze Audio", variant="primary") | |
with gr.Column(): | |
# Results | |
gr.Markdown("### Results") | |
plot_output = gr.Image(label="Visualizations") | |
summary_output = gr.Markdown() | |
status_output = gr.Textbox(label="Status", interactive=False) | |
# Event handlers | |
download_btn.click( | |
analyzer.download_youtube_audio, | |
inputs=[youtube_url], | |
outputs=[audio_file, status_output] | |
) | |
analyze_btn.click( | |
analyzer.analyze_audio, | |
inputs=[audio_file, analysis_type, patch_duration], | |
outputs=[plot_output, summary_output] | |
) | |
# Show patch duration slider only for patches analysis | |
analysis_type.change( | |
lambda x: gr.update(visible=(x == "patches")), | |
inputs=[analysis_type], | |
outputs=[patch_duration] | |
) | |
demo.unload(analyzer.cleanup) | |
return demo | |
if __name__ == "__main__": | |
demo = create_interface() | |
demo.launch() |