Spaces:
g0th
/
Sleeping

A2 / app.py
latterworks's picture
Rename app,py to app.py
b2d3cd1 verified
raw
history blame
25.7 kB
import gradio as gr
import librosa
import numpy as np
import soundfile as sf
import os
import tempfile
import shutil
from pathlib import Path
import warnings
warnings.filterwarnings("ignore")
# Import for advanced features
try:
from spleeter.separator import Separator
SPLEETER_AVAILABLE = True
except ImportError:
SPLEETER_AVAILABLE = False
print("Spleeter not available - source separation disabled")
try:
import scipy.signal
from scipy.spatial.distance import euclidean
from dtw import dtw
ADVANCED_FEATURES = True
except ImportError:
ADVANCED_FEATURES = False
print("Advanced features not available")
class AudioEngine:
"""Clean, professional audio processing engine"""
def __init__(self):
self.temp_dir = tempfile.mkdtemp()
self.separators = {} # Cache for Spleeter models
def analyze_audio(self, audio_path):
"""Extract comprehensive audio features"""
try:
# Load audio
y, sr = librosa.load(audio_path)
# Basic properties
duration = len(y) / sr
tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
# Spectral features
spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y))
# Energy features
rms_energy = np.mean(librosa.feature.rms(y=y))
# Pitch estimation
pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
pitch_values = []
for t in range(pitches.shape[1]):
index = magnitudes[:, t].argmax()
pitch = pitches[index, t]
if pitch > 0:
pitch_values.append(pitch)
avg_pitch = np.mean(pitch_values) if pitch_values else 0
return {
'success': True,
'duration': round(duration, 2),
'tempo': round(tempo, 1),
'sample_rate': sr,
'spectral_centroid': round(spectral_centroid, 2),
'spectral_rolloff': round(spectral_rolloff, 2),
'zero_crossing_rate': round(zero_crossing_rate, 4),
'rms_energy': round(rms_energy, 4),
'average_pitch': round(avg_pitch, 2),
'pitch_count': len(pitch_values),
'beats_detected': len(beats)
}
except Exception as e:
return {'success': False, 'error': str(e)}
def separate_vocals(self, audio_path, model_type="2stems"):
"""Separate vocals using Spleeter"""
if not SPLEETER_AVAILABLE:
return {'success': False, 'error': 'Spleeter not available'}
try:
# Load or create separator
if model_type not in self.separators:
self.separators[model_type] = Separator(f'spleeter:{model_type}-16kHz')
separator = self.separators[model_type]
# Create output directory
output_dir = os.path.join(self.temp_dir, f"separation_{np.random.randint(10000)}")
os.makedirs(output_dir, exist_ok=True)
# Separate
separator.separate_to_file(audio_path, output_dir)
# Get results
audio_name = Path(audio_path).stem
result_dir = os.path.join(output_dir, audio_name)
if model_type == "2stems":
vocals_path = os.path.join(result_dir, "vocals.wav")
accompaniment_path = os.path.join(result_dir, "accompaniment.wav")
return {
'success': True,
'vocals': vocals_path if os.path.exists(vocals_path) else None,
'accompaniment': accompaniment_path if os.path.exists(accompaniment_path) else None
}
elif model_type == "4stems":
vocals_path = os.path.join(result_dir, "vocals.wav")
drums_path = os.path.join(result_dir, "drums.wav")
bass_path = os.path.join(result_dir, "bass.wav")
other_path = os.path.join(result_dir, "other.wav")
return {
'success': True,
'vocals': vocals_path if os.path.exists(vocals_path) else None,
'drums': drums_path if os.path.exists(drums_path) else None,
'bass': bass_path if os.path.exists(bass_path) else None,
'other': other_path if os.path.exists(other_path) else None
}
except Exception as e:
return {'success': False, 'error': str(e)}
def apply_effects(self, audio_path, pitch_shift=0, reverb=0):
"""Apply vocal effects"""
try:
y, sr = librosa.load(audio_path)
# Apply pitch shift
if pitch_shift != 0:
y = librosa.effects.pitch_shift(y, sr=sr, n_steps=pitch_shift)
# Apply reverb (simple convolution)
if reverb > 0 and ADVANCED_FEATURES:
reverb_length = int(0.5 * sr)
impulse = np.random.randn(reverb_length) * np.exp(-np.arange(reverb_length) / (sr * 0.1))
y = scipy.signal.convolve(y, impulse * reverb, mode='same')
y = y / np.max(np.abs(y)) # Normalize
# Save processed audio
output_path = os.path.join(self.temp_dir, f"processed_{np.random.randint(10000)}.wav")
sf.write(output_path, y, sr)
return {'success': True, 'output': output_path}
except Exception as e:
return {'success': False, 'error': str(e)}
def extract_vocal_features(self, audio_path):
"""Extract features for style coaching"""
try:
y, sr = librosa.load(audio_path)
# Pitch analysis
pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
pitch_values = []
for t in range(pitches.shape[1]):
index = magnitudes[:, t].argmax()
pitch = pitches[index, t]
if pitch > 0:
pitch_values.append(pitch)
if not pitch_values:
return {'success': False, 'error': 'No pitch detected'}
# Basic vocal metrics
mean_pitch = np.mean(pitch_values)
pitch_std = np.std(pitch_values)
pitch_range = max(pitch_values) - min(pitch_values)
# Tempo
tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
# Spectral features
spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
# Energy
rms_energy = np.mean(librosa.feature.rms(y=y))
return {
'success': True,
'mean_pitch': mean_pitch,
'pitch_std': pitch_std,
'pitch_range': pitch_range,
'tempo': tempo,
'spectral_centroid': spectral_centroid,
'rms_energy': rms_energy
}
except Exception as e:
return {'success': False, 'error': str(e)}
def compare_vocal_styles(self, user_features, reference_features_list):
"""Compare user vocals to reference style"""
if not ADVANCED_FEATURES:
return {'success': False, 'error': 'Advanced features not available'}
try:
# Average reference features
ref_avg = {}
for key in ['mean_pitch', 'pitch_std', 'pitch_range', 'tempo', 'spectral_centroid', 'rms_energy']:
values = [ref[key] for ref in reference_features_list if key in ref]
ref_avg[key] = np.mean(values) if values else 0
# Calculate differences
pitch_diff = abs(user_features['mean_pitch'] - ref_avg['mean_pitch'])
tempo_diff = abs(user_features['tempo'] - ref_avg['tempo'])
timbre_diff = abs(user_features['spectral_centroid'] - ref_avg['spectral_centroid'])
energy_diff = abs(user_features['rms_energy'] - ref_avg['rms_energy'])
# Generate feedback
feedback = []
if pitch_diff > 50:
feedback.append(f"🎡 Pitch: Your average pitch differs by {pitch_diff:.1f} Hz. Practice matching the reference key.")
else:
feedback.append("🎡 Pitch: Good pitch accuracy!")
if tempo_diff > 10:
feedback.append(f"⏱️ Tempo: Your tempo differs by {tempo_diff:.1f} BPM. Work on timing consistency.")
else:
feedback.append("⏱️ Tempo: Good timing!")
if timbre_diff > 500:
feedback.append("πŸ—£οΈ Timbre: Try adjusting your vocal tone to match the reference style.")
else:
feedback.append("πŸ—£οΈ Timbre: Good vocal tone match!")
if energy_diff > 0.1:
feedback.append("πŸ”Š Energy: Adjust your vocal intensity to match the reference.")
else:
feedback.append("πŸ”Š Energy: Good energy level!")
overall_score = max(0, 100 - (pitch_diff/2 + tempo_diff + timbre_diff/10 + energy_diff*100))
return {
'success': True,
'score': round(overall_score, 1),
'feedback': feedback,
'metrics': {
'pitch_diff': round(pitch_diff, 1),
'tempo_diff': round(tempo_diff, 1),
'timbre_diff': round(timbre_diff, 1),
'energy_diff': round(energy_diff, 3)
}
}
except Exception as e:
return {'success': False, 'error': str(e)}
def cleanup(self):
"""Clean up temporary files"""
try:
if os.path.exists(self.temp_dir):
shutil.rmtree(self.temp_dir)
except Exception:
pass
# Global engine instance
engine = AudioEngine()
def format_analysis_results(analysis):
"""Format analysis results for display"""
if not analysis['success']:
return f"❌ Analysis failed: {analysis['error']}"
return f"""πŸ“Š Audio Analysis Results
🎡 Basic Properties:
β€’ Duration: {analysis['duration']} seconds
β€’ Sample Rate: {analysis['sample_rate']} Hz
β€’ Tempo: {analysis['tempo']} BPM
πŸ”Š Audio Characteristics:
β€’ Spectral Centroid: {analysis['spectral_centroid']} Hz
β€’ Spectral Rolloff: {analysis['spectral_rolloff']} Hz
β€’ Zero Crossing Rate: {analysis['zero_crossing_rate']}
β€’ RMS Energy: {analysis['rms_energy']}
🎀 Vocal Information:
β€’ Average Pitch: {analysis['average_pitch']} Hz
β€’ Pitch Points Detected: {analysis['pitch_count']}
β€’ Beats Detected: {analysis['beats_detected']}"""
def process_audio_separation(audio_file, separation_mode):
"""Main audio separation function"""
if not audio_file:
return "❌ Please upload an audio file", None, None, None, None, ""
if not SPLEETER_AVAILABLE:
return "❌ Spleeter not available for source separation", None, None, None, None, ""
try:
# Analyze audio first
analysis = engine.analyze_audio(audio_file)
analysis_text = format_analysis_results(analysis)
# Separate audio
model_type = "2stems" if "2-stem" in separation_mode else "4stems"
separation_result = engine.separate_vocals(audio_file, model_type)
if not separation_result['success']:
return f"❌ Separation failed: {separation_result['error']}", None, None, None, None, analysis_text
if model_type == "2stems":
return (
"βœ… 2-stem separation completed successfully!",
separation_result.get('vocals'),
separation_result.get('accompaniment'),
None,
None,
analysis_text
)
else:
return (
"βœ… 4-stem separation completed successfully!",
separation_result.get('vocals'),
separation_result.get('drums'),
separation_result.get('bass'),
separation_result.get('other'),
analysis_text
)
except Exception as e:
return f"❌ Processing error: {str(e)}", None, None, None, None, ""
def process_vocal_effects(audio_file, pitch_shift, reverb_amount):
"""Apply vocal effects to audio"""
if not audio_file:
return "❌ Please upload an audio file", None, ""
try:
# Analyze original
analysis = engine.analyze_audio(audio_file)
analysis_text = format_analysis_results(analysis)
# Apply effects
effects_result = engine.apply_effects(audio_file, pitch_shift, reverb_amount)
if not effects_result['success']:
return f"❌ Effects failed: {effects_result['error']}", None, analysis_text
effects_applied = []
if pitch_shift != 0:
effects_applied.append(f"Pitch: {pitch_shift:+.1f} semitones")
if reverb_amount > 0:
effects_applied.append(f"Reverb: {reverb_amount:.2f}")
status = f"βœ… Effects applied: {', '.join(effects_applied)}" if effects_applied else "βœ… Audio processed (no effects)"
return status, effects_result['output'], analysis_text
except Exception as e:
return f"❌ Processing error: {str(e)}", None, ""
def process_style_coaching(reference_files, user_audio):
"""Style coaching analysis"""
if not reference_files or len(reference_files) < 2:
return "❌ Upload at least 2 reference tracks", "", ""
if not user_audio:
return "❌ Please record or upload your performance", "", ""
if not SPLEETER_AVAILABLE or not ADVANCED_FEATURES:
return "❌ Style coaching requires advanced features", "", ""
try:
# Process reference tracks
ref_features = []
ref_status = []
for i, ref_file in enumerate(reference_files[:5]):
# Separate vocals
separation_result = engine.separate_vocals(ref_file.name, "2stems")
if separation_result['success'] and separation_result.get('vocals'):
# Extract features
features = engine.extract_vocal_features(separation_result['vocals'])
if features['success']:
ref_features.append(features)
ref_status.append(f"βœ… Reference {i+1}: Processed")
else:
ref_status.append(f"❌ Reference {i+1}: Feature extraction failed")
else:
ref_status.append(f"❌ Reference {i+1}: Vocal separation failed")
if len(ref_features) < 2:
return "❌ Need at least 2 valid reference tracks", "\n".join(ref_status), ""
# Process user audio
user_separation = engine.separate_vocals(user_audio, "2stems")
if not user_separation['success'] or not user_separation.get('vocals'):
return "❌ Could not separate vocals from your performance", "\n".join(ref_status), ""
user_features = engine.extract_vocal_features(user_separation['vocals'])
if not user_features['success']:
return "❌ Could not analyze your vocal features", "\n".join(ref_status), ""
# Compare styles
comparison = engine.compare_vocal_styles(user_features, ref_features)
if not comparison['success']:
return f"❌ Style comparison failed: {comparison['error']}", "\n".join(ref_status), ""
# Format feedback
feedback_text = f"""🎯 Vocal Style Coaching Results
πŸ“Š Overall Score: {comparison['score']}/100
🎡 Detailed Feedback:
{chr(10).join(comparison['feedback'])}
πŸ“ˆ Technical Metrics:
β€’ Pitch Difference: {comparison['metrics']['pitch_diff']} Hz
β€’ Tempo Difference: {comparison['metrics']['tempo_diff']} BPM
β€’ Timbre Difference: {comparison['metrics']['timbre_diff']} Hz
β€’ Energy Difference: {comparison['metrics']['energy_diff']}
🎯 Recommendations:
{f"πŸ”₯ Excellent! You're very close to the target style." if comparison['score'] > 80 else
f"πŸ“ˆ Good progress! Focus on the areas mentioned above." if comparison['score'] > 60 else
f"πŸ’ͺ Keep practicing! Work on basic vocal technique first."}
References analyzed: {len(ref_features)}/5"""
return f"βœ… Style coaching complete! Score: {comparison['score']}/100", "\n".join(ref_status), feedback_text
except Exception as e:
return f"❌ Coaching failed: {str(e)}", "", ""
# Create main interface
def create_app():
with gr.Blocks(title="Audio Singing Helper") as app:
gr.HTML("""
<div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px; margin-bottom: 20px;">
<h1>🎀 Audio Singing Helper</h1>
<p>Professional audio processing for singers and musicians</p>
</div>
""")
with gr.Tabs():
# Audio Separation Tab
with gr.Tab("🎡 Audio Separation"):
gr.Markdown("### Separate vocals from instrumental tracks")
with gr.Row():
with gr.Column():
sep_audio_input = gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"])
sep_mode = gr.Dropdown(
choices=["2-stem (Vocals + Instrumental)", "4-stem (Vocals + Drums + Bass + Other)"],
value="2-stem (Vocals + Instrumental)",
label="Separation Mode"
)
sep_button = gr.Button("🎯 Separate Audio", variant="primary")
with gr.Column():
sep_status = gr.Textbox(label="Status", lines=2, interactive=False)
sep_analysis = gr.Textbox(label="Audio Analysis", lines=12, interactive=False)
with gr.Row():
sep_vocals = gr.Audio(label="🎀 Vocals", show_download_button=True)
sep_instrumental = gr.Audio(label="🎼 Instrumental/Drums", show_download_button=True)
with gr.Row():
sep_bass = gr.Audio(label="🎸 Bass", show_download_button=True)
sep_other = gr.Audio(label="🎹 Other", show_download_button=True)
# Vocal Effects Tab
with gr.Tab("πŸŽ›οΈ Vocal Effects"):
gr.Markdown("### Apply professional vocal effects")
with gr.Row():
with gr.Column():
fx_audio_input = gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"])
fx_pitch = gr.Slider(-12, 12, 0, step=0.5, label="Pitch Shift (semitones)")
fx_reverb = gr.Slider(0, 0.5, 0, step=0.05, label="Reverb Amount")
fx_button = gr.Button("🎡 Apply Effects", variant="primary")
with gr.Column():
fx_status = gr.Textbox(label="Status", lines=2, interactive=False)
fx_analysis = gr.Textbox(label="Audio Analysis", lines=10, interactive=False)
fx_output = gr.Audio(label="🎧 Processed Audio", show_download_button=True)
# Live Recording Tab
with gr.Tab("πŸŽ™οΈ Live Recording"):
gr.Markdown("### Record and process your voice in real-time")
with gr.Row():
with gr.Column():
live_audio = gr.Audio(type="filepath", sources=["microphone"], label="Record Your Voice")
live_pitch = gr.Slider(-12, 12, 0, step=0.5, label="Pitch Correction")
live_reverb = gr.Slider(0, 0.5, 0, step=0.05, label="Reverb")
live_button = gr.Button("🎀 Process Recording", variant="primary")
with gr.Column():
live_status = gr.Textbox(label="Status", lines=2, interactive=False)
live_analysis = gr.Textbox(label="Recording Analysis", lines=10, interactive=False)
live_output = gr.Audio(label="🎧 Processed Recording", show_download_button=True)
# Style Coaching Tab
with gr.Tab("🎭 Style Coaching"):
gr.Markdown("### Get personalized vocal coaching feedback")
with gr.Row():
with gr.Column():
coach_refs = gr.File(
label="Reference Tracks (2-5 files)",
file_count="multiple",
file_types=["audio"]
)
coach_user = gr.Audio(
type="filepath",
label="Your Performance",
sources=["upload", "microphone"]
)
coach_button = gr.Button("🎯 Get Coaching", variant="primary")
with gr.Column():
coach_status = gr.Textbox(label="Status", lines=3, interactive=False)
coach_refs_status = gr.Textbox(label="Reference Processing", lines=8, interactive=False)
coach_feedback = gr.Textbox(label="🎯 Coaching Feedback", lines=15, interactive=False)
# Help Tab
with gr.Tab("ℹ️ Help"):
gr.Markdown("""
# 🎀 Audio Singing Helper - User Guide
## Features
### 🎡 Audio Separation
- Upload any song to separate vocals from instruments
- Choose 2-stem (vocals + instrumental) or 4-stem (vocals + drums + bass + other)
- Get detailed audio analysis of your tracks
### πŸŽ›οΈ Vocal Effects
- Apply pitch shifting (-12 to +12 semitones)
- Add reverb for spatial depth
- Process any audio file with professional effects
### πŸŽ™οΈ Live Recording
- Record directly from your microphone
- Apply real-time pitch correction and reverb
- Perfect for vocal practice and experimentation
### 🎭 Style Coaching
- Upload 2-5 reference tracks from artists you want to emulate
- Record or upload your performance
- Get AI-powered feedback on pitch, timing, and vocal characteristics
- Receive a score and specific improvement suggestions
## Tips for Best Results
- **Use high-quality audio files** - better input = better results
- **Keep files under 5 minutes** for faster processing
- **For style coaching**: Choose references from similar genres
- **Record in quiet environments** for best analysis
## Supported Formats
- Input: MP3, WAV, FLAC, M4A, OGG
- Output: High-quality WAV files
## Technical Requirements
- Some features require additional dependencies
- Processing time varies based on file length and complexity
---
Built for singers and musicians worldwide 🌍
""")
# Connect all the event handlers
sep_button.click(
process_audio_separation,
inputs=[sep_audio_input, sep_mode],
outputs=[sep_status, sep_vocals, sep_instrumental, sep_bass, sep_other, sep_analysis]
)
fx_button.click(
process_vocal_effects,
inputs=[fx_audio_input, fx_pitch, fx_reverb],
outputs=[fx_status, fx_output, fx_analysis]
)
live_button.click(
process_vocal_effects,
inputs=[live_audio, live_pitch, live_reverb],
outputs=[live_status, live_output, live_analysis]
)
coach_button.click(
process_style_coaching,
inputs=[coach_refs, coach_user],
outputs=[coach_status, coach_refs_status, coach_feedback]
)
return app
if __name__ == "__main__":
app = create_app()
app.launch()