import gradio as gr import librosa import numpy as np import soundfile as sf import os import tempfile import shutil from pathlib import Path import warnings warnings.filterwarnings("ignore") # Import for advanced features try: from spleeter.separator import Separator SPLEETER_AVAILABLE = True except ImportError: SPLEETER_AVAILABLE = False print("Spleeter not available - source separation disabled") try: import scipy.signal from scipy.spatial.distance import euclidean from dtw import dtw ADVANCED_FEATURES = True except ImportError: ADVANCED_FEATURES = False print("Advanced features not available") class AudioEngine: """Clean, professional audio processing engine""" def __init__(self): self.temp_dir = tempfile.mkdtemp() self.separators = {} # Cache for Spleeter models def analyze_audio(self, audio_path): """Extract comprehensive audio features""" try: # Load audio y, sr = librosa.load(audio_path) # Basic properties duration = len(y) / sr tempo, beats = librosa.beat.beat_track(y=y, sr=sr) # Spectral features spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)) spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)) zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y)) # Energy features rms_energy = np.mean(librosa.feature.rms(y=y)) # Pitch estimation pitches, magnitudes = librosa.piptrack(y=y, sr=sr) pitch_values = [] for t in range(pitches.shape[1]): index = magnitudes[:, t].argmax() pitch = pitches[index, t] if pitch > 0: pitch_values.append(pitch) avg_pitch = np.mean(pitch_values) if pitch_values else 0 return { 'success': True, 'duration': round(duration, 2), 'tempo': round(tempo, 1), 'sample_rate': sr, 'spectral_centroid': round(spectral_centroid, 2), 'spectral_rolloff': round(spectral_rolloff, 2), 'zero_crossing_rate': round(zero_crossing_rate, 4), 'rms_energy': round(rms_energy, 4), 'average_pitch': round(avg_pitch, 2), 'pitch_count': len(pitch_values), 'beats_detected': len(beats) } except Exception as e: return {'success': False, 'error': str(e)} def separate_vocals(self, audio_path, model_type="2stems"): """Separate vocals using Spleeter""" if not SPLEETER_AVAILABLE: return {'success': False, 'error': 'Spleeter not available'} try: # Load or create separator if model_type not in self.separators: self.separators[model_type] = Separator(f'spleeter:{model_type}-16kHz') separator = self.separators[model_type] # Create output directory output_dir = os.path.join(self.temp_dir, f"separation_{np.random.randint(10000)}") os.makedirs(output_dir, exist_ok=True) # Separate separator.separate_to_file(audio_path, output_dir) # Get results audio_name = Path(audio_path).stem result_dir = os.path.join(output_dir, audio_name) if model_type == "2stems": vocals_path = os.path.join(result_dir, "vocals.wav") accompaniment_path = os.path.join(result_dir, "accompaniment.wav") return { 'success': True, 'vocals': vocals_path if os.path.exists(vocals_path) else None, 'accompaniment': accompaniment_path if os.path.exists(accompaniment_path) else None } elif model_type == "4stems": vocals_path = os.path.join(result_dir, "vocals.wav") drums_path = os.path.join(result_dir, "drums.wav") bass_path = os.path.join(result_dir, "bass.wav") other_path = os.path.join(result_dir, "other.wav") return { 'success': True, 'vocals': vocals_path if os.path.exists(vocals_path) else None, 'drums': drums_path if os.path.exists(drums_path) else None, 'bass': bass_path if os.path.exists(bass_path) else None, 'other': other_path if os.path.exists(other_path) else None } except Exception as e: return {'success': False, 'error': str(e)} def apply_effects(self, audio_path, pitch_shift=0, reverb=0): """Apply vocal effects""" try: y, sr = librosa.load(audio_path) # Apply pitch shift if pitch_shift != 0: y = librosa.effects.pitch_shift(y, sr=sr, n_steps=pitch_shift) # Apply reverb (simple convolution) if reverb > 0 and ADVANCED_FEATURES: reverb_length = int(0.5 * sr) impulse = np.random.randn(reverb_length) * np.exp(-np.arange(reverb_length) / (sr * 0.1)) y = scipy.signal.convolve(y, impulse * reverb, mode='same') y = y / np.max(np.abs(y)) # Normalize # Save processed audio output_path = os.path.join(self.temp_dir, f"processed_{np.random.randint(10000)}.wav") sf.write(output_path, y, sr) return {'success': True, 'output': output_path} except Exception as e: return {'success': False, 'error': str(e)} def extract_vocal_features(self, audio_path): """Extract features for style coaching""" try: y, sr = librosa.load(audio_path) # Pitch analysis pitches, magnitudes = librosa.piptrack(y=y, sr=sr) pitch_values = [] for t in range(pitches.shape[1]): index = magnitudes[:, t].argmax() pitch = pitches[index, t] if pitch > 0: pitch_values.append(pitch) if not pitch_values: return {'success': False, 'error': 'No pitch detected'} # Basic vocal metrics mean_pitch = np.mean(pitch_values) pitch_std = np.std(pitch_values) pitch_range = max(pitch_values) - min(pitch_values) # Tempo tempo, _ = librosa.beat.beat_track(y=y, sr=sr) # Spectral features spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)) # Energy rms_energy = np.mean(librosa.feature.rms(y=y)) return { 'success': True, 'mean_pitch': mean_pitch, 'pitch_std': pitch_std, 'pitch_range': pitch_range, 'tempo': tempo, 'spectral_centroid': spectral_centroid, 'rms_energy': rms_energy } except Exception as e: return {'success': False, 'error': str(e)} def compare_vocal_styles(self, user_features, reference_features_list): """Compare user vocals to reference style""" if not ADVANCED_FEATURES: return {'success': False, 'error': 'Advanced features not available'} try: # Average reference features ref_avg = {} for key in ['mean_pitch', 'pitch_std', 'pitch_range', 'tempo', 'spectral_centroid', 'rms_energy']: values = [ref[key] for ref in reference_features_list if key in ref] ref_avg[key] = np.mean(values) if values else 0 # Calculate differences pitch_diff = abs(user_features['mean_pitch'] - ref_avg['mean_pitch']) tempo_diff = abs(user_features['tempo'] - ref_avg['tempo']) timbre_diff = abs(user_features['spectral_centroid'] - ref_avg['spectral_centroid']) energy_diff = abs(user_features['rms_energy'] - ref_avg['rms_energy']) # Generate feedback feedback = [] if pitch_diff > 50: feedback.append(f"đĩ Pitch: Your average pitch differs by {pitch_diff:.1f} Hz. Practice matching the reference key.") else: feedback.append("đĩ Pitch: Good pitch accuracy!") if tempo_diff > 10: feedback.append(f"âąī¸ Tempo: Your tempo differs by {tempo_diff:.1f} BPM. Work on timing consistency.") else: feedback.append("âąī¸ Tempo: Good timing!") if timbre_diff > 500: feedback.append("đŖī¸ Timbre: Try adjusting your vocal tone to match the reference style.") else: feedback.append("đŖī¸ Timbre: Good vocal tone match!") if energy_diff > 0.1: feedback.append("đ Energy: Adjust your vocal intensity to match the reference.") else: feedback.append("đ Energy: Good energy level!") overall_score = max(0, 100 - (pitch_diff/2 + tempo_diff + timbre_diff/10 + energy_diff*100)) return { 'success': True, 'score': round(overall_score, 1), 'feedback': feedback, 'metrics': { 'pitch_diff': round(pitch_diff, 1), 'tempo_diff': round(tempo_diff, 1), 'timbre_diff': round(timbre_diff, 1), 'energy_diff': round(energy_diff, 3) } } except Exception as e: return {'success': False, 'error': str(e)} def cleanup(self): """Clean up temporary files""" try: if os.path.exists(self.temp_dir): shutil.rmtree(self.temp_dir) except Exception: pass # Global engine instance engine = AudioEngine() def format_analysis_results(analysis): """Format analysis results for display""" if not analysis['success']: return f"â Analysis failed: {analysis['error']}" return f"""đ Audio Analysis Results đĩ Basic Properties: âĸ Duration: {analysis['duration']} seconds âĸ Sample Rate: {analysis['sample_rate']} Hz âĸ Tempo: {analysis['tempo']} BPM đ Audio Characteristics: âĸ Spectral Centroid: {analysis['spectral_centroid']} Hz âĸ Spectral Rolloff: {analysis['spectral_rolloff']} Hz âĸ Zero Crossing Rate: {analysis['zero_crossing_rate']} âĸ RMS Energy: {analysis['rms_energy']} đ¤ Vocal Information: âĸ Average Pitch: {analysis['average_pitch']} Hz âĸ Pitch Points Detected: {analysis['pitch_count']} âĸ Beats Detected: {analysis['beats_detected']}""" def process_audio_separation(audio_file, separation_mode): """Main audio separation function""" if not audio_file: return "â Please upload an audio file", None, None, None, None, "" if not SPLEETER_AVAILABLE: return "â Spleeter not available for source separation", None, None, None, None, "" try: # Analyze audio first analysis = engine.analyze_audio(audio_file) analysis_text = format_analysis_results(analysis) # Separate audio model_type = "2stems" if "2-stem" in separation_mode else "4stems" separation_result = engine.separate_vocals(audio_file, model_type) if not separation_result['success']: return f"â Separation failed: {separation_result['error']}", None, None, None, None, analysis_text if model_type == "2stems": return ( "â 2-stem separation completed successfully!", separation_result.get('vocals'), separation_result.get('accompaniment'), None, None, analysis_text ) else: return ( "â 4-stem separation completed successfully!", separation_result.get('vocals'), separation_result.get('drums'), separation_result.get('bass'), separation_result.get('other'), analysis_text ) except Exception as e: return f"â Processing error: {str(e)}", None, None, None, None, "" def process_vocal_effects(audio_file, pitch_shift, reverb_amount): """Apply vocal effects to audio""" if not audio_file: return "â Please upload an audio file", None, "" try: # Analyze original analysis = engine.analyze_audio(audio_file) analysis_text = format_analysis_results(analysis) # Apply effects effects_result = engine.apply_effects(audio_file, pitch_shift, reverb_amount) if not effects_result['success']: return f"â Effects failed: {effects_result['error']}", None, analysis_text effects_applied = [] if pitch_shift != 0: effects_applied.append(f"Pitch: {pitch_shift:+.1f} semitones") if reverb_amount > 0: effects_applied.append(f"Reverb: {reverb_amount:.2f}") status = f"â Effects applied: {', '.join(effects_applied)}" if effects_applied else "â Audio processed (no effects)" return status, effects_result['output'], analysis_text except Exception as e: return f"â Processing error: {str(e)}", None, "" def process_style_coaching(reference_files, user_audio): """Style coaching analysis""" if not reference_files or len(reference_files) < 2: return "â Upload at least 2 reference tracks", "", "" if not user_audio: return "â Please record or upload your performance", "", "" if not SPLEETER_AVAILABLE or not ADVANCED_FEATURES: return "â Style coaching requires advanced features", "", "" try: # Process reference tracks ref_features = [] ref_status = [] for i, ref_file in enumerate(reference_files[:5]): # Separate vocals separation_result = engine.separate_vocals(ref_file.name, "2stems") if separation_result['success'] and separation_result.get('vocals'): # Extract features features = engine.extract_vocal_features(separation_result['vocals']) if features['success']: ref_features.append(features) ref_status.append(f"â Reference {i+1}: Processed") else: ref_status.append(f"â Reference {i+1}: Feature extraction failed") else: ref_status.append(f"â Reference {i+1}: Vocal separation failed") if len(ref_features) < 2: return "â Need at least 2 valid reference tracks", "\n".join(ref_status), "" # Process user audio user_separation = engine.separate_vocals(user_audio, "2stems") if not user_separation['success'] or not user_separation.get('vocals'): return "â Could not separate vocals from your performance", "\n".join(ref_status), "" user_features = engine.extract_vocal_features(user_separation['vocals']) if not user_features['success']: return "â Could not analyze your vocal features", "\n".join(ref_status), "" # Compare styles comparison = engine.compare_vocal_styles(user_features, ref_features) if not comparison['success']: return f"â Style comparison failed: {comparison['error']}", "\n".join(ref_status), "" # Format feedback feedback_text = f"""đ¯ Vocal Style Coaching Results đ Overall Score: {comparison['score']}/100 đĩ Detailed Feedback: {chr(10).join(comparison['feedback'])} đ Technical Metrics: âĸ Pitch Difference: {comparison['metrics']['pitch_diff']} Hz âĸ Tempo Difference: {comparison['metrics']['tempo_diff']} BPM âĸ Timbre Difference: {comparison['metrics']['timbre_diff']} Hz âĸ Energy Difference: {comparison['metrics']['energy_diff']} đ¯ Recommendations: {f"đĨ Excellent! You're very close to the target style." if comparison['score'] > 80 else f"đ Good progress! Focus on the areas mentioned above." if comparison['score'] > 60 else f"đĒ Keep practicing! Work on basic vocal technique first."} References analyzed: {len(ref_features)}/5""" return f"â Style coaching complete! Score: {comparison['score']}/100", "\n".join(ref_status), feedback_text except Exception as e: return f"â Coaching failed: {str(e)}", "", "" # Create main interface def create_app(): with gr.Blocks(title="Audio Singing Helper") as app: gr.HTML("""
Professional audio processing for singers and musicians