Create APP.PY
Browse files
APP.PY
ADDED
@@ -0,0 +1,611 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import librosa
|
3 |
+
import numpy as np
|
4 |
+
import soundfile as sf
|
5 |
+
import os
|
6 |
+
import tempfile
|
7 |
+
import shutil
|
8 |
+
from pathlib import Path
|
9 |
+
import warnings
|
10 |
+
warnings.filterwarnings("ignore")
|
11 |
+
|
12 |
+
# Import for advanced features
|
13 |
+
try:
|
14 |
+
from spleeter.separator import Separator
|
15 |
+
SPLEETER_AVAILABLE = True
|
16 |
+
except ImportError:
|
17 |
+
SPLEETER_AVAILABLE = False
|
18 |
+
print("Spleeter not available - source separation disabled")
|
19 |
+
|
20 |
+
try:
|
21 |
+
import scipy.signal
|
22 |
+
from scipy.spatial.distance import euclidean
|
23 |
+
from dtw import dtw
|
24 |
+
ADVANCED_FEATURES = True
|
25 |
+
except ImportError:
|
26 |
+
ADVANCED_FEATURES = False
|
27 |
+
print("Advanced features not available")
|
28 |
+
|
29 |
+
class AudioEngine:
|
30 |
+
"""Clean, professional audio processing engine"""
|
31 |
+
|
32 |
+
def __init__(self):
|
33 |
+
self.temp_dir = tempfile.mkdtemp()
|
34 |
+
self.separators = {} # Cache for Spleeter models
|
35 |
+
|
36 |
+
def analyze_audio(self, audio_path):
|
37 |
+
"""Extract comprehensive audio features"""
|
38 |
+
try:
|
39 |
+
# Load audio
|
40 |
+
y, sr = librosa.load(audio_path)
|
41 |
+
|
42 |
+
# Basic properties
|
43 |
+
duration = len(y) / sr
|
44 |
+
tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
|
45 |
+
|
46 |
+
# Spectral features
|
47 |
+
spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
|
48 |
+
spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
|
49 |
+
zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y))
|
50 |
+
|
51 |
+
# Energy features
|
52 |
+
rms_energy = np.mean(librosa.feature.rms(y=y))
|
53 |
+
|
54 |
+
# Pitch estimation
|
55 |
+
pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
|
56 |
+
pitch_values = []
|
57 |
+
for t in range(pitches.shape[1]):
|
58 |
+
index = magnitudes[:, t].argmax()
|
59 |
+
pitch = pitches[index, t]
|
60 |
+
if pitch > 0:
|
61 |
+
pitch_values.append(pitch)
|
62 |
+
|
63 |
+
avg_pitch = np.mean(pitch_values) if pitch_values else 0
|
64 |
+
|
65 |
+
return {
|
66 |
+
'success': True,
|
67 |
+
'duration': round(duration, 2),
|
68 |
+
'tempo': round(tempo, 1),
|
69 |
+
'sample_rate': sr,
|
70 |
+
'spectral_centroid': round(spectral_centroid, 2),
|
71 |
+
'spectral_rolloff': round(spectral_rolloff, 2),
|
72 |
+
'zero_crossing_rate': round(zero_crossing_rate, 4),
|
73 |
+
'rms_energy': round(rms_energy, 4),
|
74 |
+
'average_pitch': round(avg_pitch, 2),
|
75 |
+
'pitch_count': len(pitch_values),
|
76 |
+
'beats_detected': len(beats)
|
77 |
+
}
|
78 |
+
|
79 |
+
except Exception as e:
|
80 |
+
return {'success': False, 'error': str(e)}
|
81 |
+
|
82 |
+
def separate_vocals(self, audio_path, model_type="2stems"):
|
83 |
+
"""Separate vocals using Spleeter"""
|
84 |
+
if not SPLEETER_AVAILABLE:
|
85 |
+
return {'success': False, 'error': 'Spleeter not available'}
|
86 |
+
|
87 |
+
try:
|
88 |
+
# Load or create separator
|
89 |
+
if model_type not in self.separators:
|
90 |
+
self.separators[model_type] = Separator(f'spleeter:{model_type}-16kHz')
|
91 |
+
|
92 |
+
separator = self.separators[model_type]
|
93 |
+
|
94 |
+
# Create output directory
|
95 |
+
output_dir = os.path.join(self.temp_dir, f"separation_{np.random.randint(10000)}")
|
96 |
+
os.makedirs(output_dir, exist_ok=True)
|
97 |
+
|
98 |
+
# Separate
|
99 |
+
separator.separate_to_file(audio_path, output_dir)
|
100 |
+
|
101 |
+
# Get results
|
102 |
+
audio_name = Path(audio_path).stem
|
103 |
+
result_dir = os.path.join(output_dir, audio_name)
|
104 |
+
|
105 |
+
if model_type == "2stems":
|
106 |
+
vocals_path = os.path.join(result_dir, "vocals.wav")
|
107 |
+
accompaniment_path = os.path.join(result_dir, "accompaniment.wav")
|
108 |
+
|
109 |
+
return {
|
110 |
+
'success': True,
|
111 |
+
'vocals': vocals_path if os.path.exists(vocals_path) else None,
|
112 |
+
'accompaniment': accompaniment_path if os.path.exists(accompaniment_path) else None
|
113 |
+
}
|
114 |
+
|
115 |
+
elif model_type == "4stems":
|
116 |
+
vocals_path = os.path.join(result_dir, "vocals.wav")
|
117 |
+
drums_path = os.path.join(result_dir, "drums.wav")
|
118 |
+
bass_path = os.path.join(result_dir, "bass.wav")
|
119 |
+
other_path = os.path.join(result_dir, "other.wav")
|
120 |
+
|
121 |
+
return {
|
122 |
+
'success': True,
|
123 |
+
'vocals': vocals_path if os.path.exists(vocals_path) else None,
|
124 |
+
'drums': drums_path if os.path.exists(drums_path) else None,
|
125 |
+
'bass': bass_path if os.path.exists(bass_path) else None,
|
126 |
+
'other': other_path if os.path.exists(other_path) else None
|
127 |
+
}
|
128 |
+
|
129 |
+
except Exception as e:
|
130 |
+
return {'success': False, 'error': str(e)}
|
131 |
+
|
132 |
+
def apply_effects(self, audio_path, pitch_shift=0, reverb=0):
|
133 |
+
"""Apply vocal effects"""
|
134 |
+
try:
|
135 |
+
y, sr = librosa.load(audio_path)
|
136 |
+
|
137 |
+
# Apply pitch shift
|
138 |
+
if pitch_shift != 0:
|
139 |
+
y = librosa.effects.pitch_shift(y, sr=sr, n_steps=pitch_shift)
|
140 |
+
|
141 |
+
# Apply reverb (simple convolution)
|
142 |
+
if reverb > 0 and ADVANCED_FEATURES:
|
143 |
+
reverb_length = int(0.5 * sr)
|
144 |
+
impulse = np.random.randn(reverb_length) * np.exp(-np.arange(reverb_length) / (sr * 0.1))
|
145 |
+
y = scipy.signal.convolve(y, impulse * reverb, mode='same')
|
146 |
+
y = y / np.max(np.abs(y)) # Normalize
|
147 |
+
|
148 |
+
# Save processed audio
|
149 |
+
output_path = os.path.join(self.temp_dir, f"processed_{np.random.randint(10000)}.wav")
|
150 |
+
sf.write(output_path, y, sr)
|
151 |
+
|
152 |
+
return {'success': True, 'output': output_path}
|
153 |
+
|
154 |
+
except Exception as e:
|
155 |
+
return {'success': False, 'error': str(e)}
|
156 |
+
|
157 |
+
def extract_vocal_features(self, audio_path):
|
158 |
+
"""Extract features for style coaching"""
|
159 |
+
try:
|
160 |
+
y, sr = librosa.load(audio_path)
|
161 |
+
|
162 |
+
# Pitch analysis
|
163 |
+
pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
|
164 |
+
pitch_values = []
|
165 |
+
for t in range(pitches.shape[1]):
|
166 |
+
index = magnitudes[:, t].argmax()
|
167 |
+
pitch = pitches[index, t]
|
168 |
+
if pitch > 0:
|
169 |
+
pitch_values.append(pitch)
|
170 |
+
|
171 |
+
if not pitch_values:
|
172 |
+
return {'success': False, 'error': 'No pitch detected'}
|
173 |
+
|
174 |
+
# Basic vocal metrics
|
175 |
+
mean_pitch = np.mean(pitch_values)
|
176 |
+
pitch_std = np.std(pitch_values)
|
177 |
+
pitch_range = max(pitch_values) - min(pitch_values)
|
178 |
+
|
179 |
+
# Tempo
|
180 |
+
tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
|
181 |
+
|
182 |
+
# Spectral features
|
183 |
+
spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
|
184 |
+
|
185 |
+
# Energy
|
186 |
+
rms_energy = np.mean(librosa.feature.rms(y=y))
|
187 |
+
|
188 |
+
return {
|
189 |
+
'success': True,
|
190 |
+
'mean_pitch': mean_pitch,
|
191 |
+
'pitch_std': pitch_std,
|
192 |
+
'pitch_range': pitch_range,
|
193 |
+
'tempo': tempo,
|
194 |
+
'spectral_centroid': spectral_centroid,
|
195 |
+
'rms_energy': rms_energy
|
196 |
+
}
|
197 |
+
|
198 |
+
except Exception as e:
|
199 |
+
return {'success': False, 'error': str(e)}
|
200 |
+
|
201 |
+
def compare_vocal_styles(self, user_features, reference_features_list):
|
202 |
+
"""Compare user vocals to reference style"""
|
203 |
+
if not ADVANCED_FEATURES:
|
204 |
+
return {'success': False, 'error': 'Advanced features not available'}
|
205 |
+
|
206 |
+
try:
|
207 |
+
# Average reference features
|
208 |
+
ref_avg = {}
|
209 |
+
for key in ['mean_pitch', 'pitch_std', 'pitch_range', 'tempo', 'spectral_centroid', 'rms_energy']:
|
210 |
+
values = [ref[key] for ref in reference_features_list if key in ref]
|
211 |
+
ref_avg[key] = np.mean(values) if values else 0
|
212 |
+
|
213 |
+
# Calculate differences
|
214 |
+
pitch_diff = abs(user_features['mean_pitch'] - ref_avg['mean_pitch'])
|
215 |
+
tempo_diff = abs(user_features['tempo'] - ref_avg['tempo'])
|
216 |
+
timbre_diff = abs(user_features['spectral_centroid'] - ref_avg['spectral_centroid'])
|
217 |
+
energy_diff = abs(user_features['rms_energy'] - ref_avg['rms_energy'])
|
218 |
+
|
219 |
+
# Generate feedback
|
220 |
+
feedback = []
|
221 |
+
|
222 |
+
if pitch_diff > 50:
|
223 |
+
feedback.append(f"π΅ Pitch: Your average pitch differs by {pitch_diff:.1f} Hz. Practice matching the reference key.")
|
224 |
+
else:
|
225 |
+
feedback.append("π΅ Pitch: Good pitch accuracy!")
|
226 |
+
|
227 |
+
if tempo_diff > 10:
|
228 |
+
feedback.append(f"β±οΈ Tempo: Your tempo differs by {tempo_diff:.1f} BPM. Work on timing consistency.")
|
229 |
+
else:
|
230 |
+
feedback.append("β±οΈ Tempo: Good timing!")
|
231 |
+
|
232 |
+
if timbre_diff > 500:
|
233 |
+
feedback.append("π£οΈ Timbre: Try adjusting your vocal tone to match the reference style.")
|
234 |
+
else:
|
235 |
+
feedback.append("π£οΈ Timbre: Good vocal tone match!")
|
236 |
+
|
237 |
+
if energy_diff > 0.1:
|
238 |
+
feedback.append("π Energy: Adjust your vocal intensity to match the reference.")
|
239 |
+
else:
|
240 |
+
feedback.append("π Energy: Good energy level!")
|
241 |
+
|
242 |
+
overall_score = max(0, 100 - (pitch_diff/2 + tempo_diff + timbre_diff/10 + energy_diff*100))
|
243 |
+
|
244 |
+
return {
|
245 |
+
'success': True,
|
246 |
+
'score': round(overall_score, 1),
|
247 |
+
'feedback': feedback,
|
248 |
+
'metrics': {
|
249 |
+
'pitch_diff': round(pitch_diff, 1),
|
250 |
+
'tempo_diff': round(tempo_diff, 1),
|
251 |
+
'timbre_diff': round(timbre_diff, 1),
|
252 |
+
'energy_diff': round(energy_diff, 3)
|
253 |
+
}
|
254 |
+
}
|
255 |
+
|
256 |
+
except Exception as e:
|
257 |
+
return {'success': False, 'error': str(e)}
|
258 |
+
|
259 |
+
def cleanup(self):
|
260 |
+
"""Clean up temporary files"""
|
261 |
+
try:
|
262 |
+
if os.path.exists(self.temp_dir):
|
263 |
+
shutil.rmtree(self.temp_dir)
|
264 |
+
except Exception:
|
265 |
+
pass
|
266 |
+
|
267 |
+
# Global engine instance
|
268 |
+
engine = AudioEngine()
|
269 |
+
|
270 |
+
def format_analysis_results(analysis):
|
271 |
+
"""Format analysis results for display"""
|
272 |
+
if not analysis['success']:
|
273 |
+
return f"β Analysis failed: {analysis['error']}"
|
274 |
+
|
275 |
+
return f"""π Audio Analysis Results
|
276 |
+
|
277 |
+
π΅ Basic Properties:
|
278 |
+
β’ Duration: {analysis['duration']} seconds
|
279 |
+
β’ Sample Rate: {analysis['sample_rate']} Hz
|
280 |
+
β’ Tempo: {analysis['tempo']} BPM
|
281 |
+
|
282 |
+
π Audio Characteristics:
|
283 |
+
β’ Spectral Centroid: {analysis['spectral_centroid']} Hz
|
284 |
+
β’ Spectral Rolloff: {analysis['spectral_rolloff']} Hz
|
285 |
+
β’ Zero Crossing Rate: {analysis['zero_crossing_rate']}
|
286 |
+
β’ RMS Energy: {analysis['rms_energy']}
|
287 |
+
|
288 |
+
π€ Vocal Information:
|
289 |
+
β’ Average Pitch: {analysis['average_pitch']} Hz
|
290 |
+
β’ Pitch Points Detected: {analysis['pitch_count']}
|
291 |
+
β’ Beats Detected: {analysis['beats_detected']}"""
|
292 |
+
|
293 |
+
def process_audio_separation(audio_file, separation_mode):
|
294 |
+
"""Main audio separation function"""
|
295 |
+
if not audio_file:
|
296 |
+
return "β Please upload an audio file", None, None, None, None, ""
|
297 |
+
|
298 |
+
if not SPLEETER_AVAILABLE:
|
299 |
+
return "β Spleeter not available for source separation", None, None, None, None, ""
|
300 |
+
|
301 |
+
try:
|
302 |
+
# Analyze audio first
|
303 |
+
analysis = engine.analyze_audio(audio_file)
|
304 |
+
analysis_text = format_analysis_results(analysis)
|
305 |
+
|
306 |
+
# Separate audio
|
307 |
+
model_type = "2stems" if "2-stem" in separation_mode else "4stems"
|
308 |
+
separation_result = engine.separate_vocals(audio_file, model_type)
|
309 |
+
|
310 |
+
if not separation_result['success']:
|
311 |
+
return f"β Separation failed: {separation_result['error']}", None, None, None, None, analysis_text
|
312 |
+
|
313 |
+
if model_type == "2stems":
|
314 |
+
return (
|
315 |
+
"β
2-stem separation completed successfully!",
|
316 |
+
separation_result.get('vocals'),
|
317 |
+
separation_result.get('accompaniment'),
|
318 |
+
None,
|
319 |
+
None,
|
320 |
+
analysis_text
|
321 |
+
)
|
322 |
+
else:
|
323 |
+
return (
|
324 |
+
"β
4-stem separation completed successfully!",
|
325 |
+
separation_result.get('vocals'),
|
326 |
+
separation_result.get('drums'),
|
327 |
+
separation_result.get('bass'),
|
328 |
+
separation_result.get('other'),
|
329 |
+
analysis_text
|
330 |
+
)
|
331 |
+
|
332 |
+
except Exception as e:
|
333 |
+
return f"β Processing error: {str(e)}", None, None, None, None, ""
|
334 |
+
|
335 |
+
def process_vocal_effects(audio_file, pitch_shift, reverb_amount):
|
336 |
+
"""Apply vocal effects to audio"""
|
337 |
+
if not audio_file:
|
338 |
+
return "β Please upload an audio file", None, ""
|
339 |
+
|
340 |
+
try:
|
341 |
+
# Analyze original
|
342 |
+
analysis = engine.analyze_audio(audio_file)
|
343 |
+
analysis_text = format_analysis_results(analysis)
|
344 |
+
|
345 |
+
# Apply effects
|
346 |
+
effects_result = engine.apply_effects(audio_file, pitch_shift, reverb_amount)
|
347 |
+
|
348 |
+
if not effects_result['success']:
|
349 |
+
return f"β Effects failed: {effects_result['error']}", None, analysis_text
|
350 |
+
|
351 |
+
effects_applied = []
|
352 |
+
if pitch_shift != 0:
|
353 |
+
effects_applied.append(f"Pitch: {pitch_shift:+.1f} semitones")
|
354 |
+
if reverb_amount > 0:
|
355 |
+
effects_applied.append(f"Reverb: {reverb_amount:.2f}")
|
356 |
+
|
357 |
+
status = f"β
Effects applied: {', '.join(effects_applied)}" if effects_applied else "β
Audio processed (no effects)"
|
358 |
+
|
359 |
+
return status, effects_result['output'], analysis_text
|
360 |
+
|
361 |
+
except Exception as e:
|
362 |
+
return f"β Processing error: {str(e)}", None, ""
|
363 |
+
|
364 |
+
def process_style_coaching(reference_files, user_audio):
|
365 |
+
"""Style coaching analysis"""
|
366 |
+
if not reference_files or len(reference_files) < 2:
|
367 |
+
return "β Upload at least 2 reference tracks", "", ""
|
368 |
+
|
369 |
+
if not user_audio:
|
370 |
+
return "β Please record or upload your performance", "", ""
|
371 |
+
|
372 |
+
if not SPLEETER_AVAILABLE or not ADVANCED_FEATURES:
|
373 |
+
return "β Style coaching requires advanced features", "", ""
|
374 |
+
|
375 |
+
try:
|
376 |
+
# Process reference tracks
|
377 |
+
ref_features = []
|
378 |
+
ref_status = []
|
379 |
+
|
380 |
+
for i, ref_file in enumerate(reference_files[:5]):
|
381 |
+
# Separate vocals
|
382 |
+
separation_result = engine.separate_vocals(ref_file.name, "2stems")
|
383 |
+
if separation_result['success'] and separation_result.get('vocals'):
|
384 |
+
# Extract features
|
385 |
+
features = engine.extract_vocal_features(separation_result['vocals'])
|
386 |
+
if features['success']:
|
387 |
+
ref_features.append(features)
|
388 |
+
ref_status.append(f"β
Reference {i+1}: Processed")
|
389 |
+
else:
|
390 |
+
ref_status.append(f"β Reference {i+1}: Feature extraction failed")
|
391 |
+
else:
|
392 |
+
ref_status.append(f"β Reference {i+1}: Vocal separation failed")
|
393 |
+
|
394 |
+
if len(ref_features) < 2:
|
395 |
+
return "β Need at least 2 valid reference tracks", "\n".join(ref_status), ""
|
396 |
+
|
397 |
+
# Process user audio
|
398 |
+
user_separation = engine.separate_vocals(user_audio, "2stems")
|
399 |
+
if not user_separation['success'] or not user_separation.get('vocals'):
|
400 |
+
return "β Could not separate vocals from your performance", "\n".join(ref_status), ""
|
401 |
+
|
402 |
+
user_features = engine.extract_vocal_features(user_separation['vocals'])
|
403 |
+
if not user_features['success']:
|
404 |
+
return "β Could not analyze your vocal features", "\n".join(ref_status), ""
|
405 |
+
|
406 |
+
# Compare styles
|
407 |
+
comparison = engine.compare_vocal_styles(user_features, ref_features)
|
408 |
+
if not comparison['success']:
|
409 |
+
return f"β Style comparison failed: {comparison['error']}", "\n".join(ref_status), ""
|
410 |
+
|
411 |
+
# Format feedback
|
412 |
+
feedback_text = f"""π― Vocal Style Coaching Results
|
413 |
+
|
414 |
+
π Overall Score: {comparison['score']}/100
|
415 |
+
|
416 |
+
π΅ Detailed Feedback:
|
417 |
+
{chr(10).join(comparison['feedback'])}
|
418 |
+
|
419 |
+
π Technical Metrics:
|
420 |
+
β’ Pitch Difference: {comparison['metrics']['pitch_diff']} Hz
|
421 |
+
β’ Tempo Difference: {comparison['metrics']['tempo_diff']} BPM
|
422 |
+
β’ Timbre Difference: {comparison['metrics']['timbre_diff']} Hz
|
423 |
+
β’ Energy Difference: {comparison['metrics']['energy_diff']}
|
424 |
+
|
425 |
+
π― Recommendations:
|
426 |
+
{f"π₯ Excellent! You're very close to the target style." if comparison['score'] > 80 else
|
427 |
+
f"π Good progress! Focus on the areas mentioned above." if comparison['score'] > 60 else
|
428 |
+
f"πͺ Keep practicing! Work on basic vocal technique first."}
|
429 |
+
|
430 |
+
References analyzed: {len(ref_features)}/5"""
|
431 |
+
|
432 |
+
return f"β
Style coaching complete! Score: {comparison['score']}/100", "\n".join(ref_status), feedback_text
|
433 |
+
|
434 |
+
except Exception as e:
|
435 |
+
return f"β Coaching failed: {str(e)}", "", ""
|
436 |
+
|
437 |
+
# Create main interface
|
438 |
+
def create_app():
|
439 |
+
|
440 |
+
with gr.Blocks(title="Audio Singing Helper") as app:
|
441 |
+
|
442 |
+
gr.HTML("""
|
443 |
+
<div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px; margin-bottom: 20px;">
|
444 |
+
<h1>π€ Audio Singing Helper</h1>
|
445 |
+
<p>Professional audio processing for singers and musicians</p>
|
446 |
+
</div>
|
447 |
+
""")
|
448 |
+
|
449 |
+
with gr.Tabs():
|
450 |
+
|
451 |
+
# Audio Separation Tab
|
452 |
+
with gr.Tab("π΅ Audio Separation"):
|
453 |
+
gr.Markdown("### Separate vocals from instrumental tracks")
|
454 |
+
|
455 |
+
with gr.Row():
|
456 |
+
with gr.Column():
|
457 |
+
sep_audio_input = gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"])
|
458 |
+
sep_mode = gr.Dropdown(
|
459 |
+
choices=["2-stem (Vocals + Instrumental)", "4-stem (Vocals + Drums + Bass + Other)"],
|
460 |
+
value="2-stem (Vocals + Instrumental)",
|
461 |
+
label="Separation Mode"
|
462 |
+
)
|
463 |
+
sep_button = gr.Button("π― Separate Audio", variant="primary")
|
464 |
+
|
465 |
+
with gr.Column():
|
466 |
+
sep_status = gr.Textbox(label="Status", lines=2, interactive=False)
|
467 |
+
sep_analysis = gr.Textbox(label="Audio Analysis", lines=12, interactive=False)
|
468 |
+
|
469 |
+
with gr.Row():
|
470 |
+
sep_vocals = gr.Audio(label="π€ Vocals", show_download_button=True)
|
471 |
+
sep_instrumental = gr.Audio(label="πΌ Instrumental/Drums", show_download_button=True)
|
472 |
+
|
473 |
+
with gr.Row():
|
474 |
+
sep_bass = gr.Audio(label="πΈ Bass", show_download_button=True)
|
475 |
+
sep_other = gr.Audio(label="πΉ Other", show_download_button=True)
|
476 |
+
|
477 |
+
# Vocal Effects Tab
|
478 |
+
with gr.Tab("ποΈ Vocal Effects"):
|
479 |
+
gr.Markdown("### Apply professional vocal effects")
|
480 |
+
|
481 |
+
with gr.Row():
|
482 |
+
with gr.Column():
|
483 |
+
fx_audio_input = gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"])
|
484 |
+
fx_pitch = gr.Slider(-12, 12, 0, step=0.5, label="Pitch Shift (semitones)")
|
485 |
+
fx_reverb = gr.Slider(0, 0.5, 0, step=0.05, label="Reverb Amount")
|
486 |
+
fx_button = gr.Button("π΅ Apply Effects", variant="primary")
|
487 |
+
|
488 |
+
with gr.Column():
|
489 |
+
fx_status = gr.Textbox(label="Status", lines=2, interactive=False)
|
490 |
+
fx_analysis = gr.Textbox(label="Audio Analysis", lines=10, interactive=False)
|
491 |
+
|
492 |
+
fx_output = gr.Audio(label="π§ Processed Audio", show_download_button=True)
|
493 |
+
|
494 |
+
# Live Recording Tab
|
495 |
+
with gr.Tab("ποΈ Live Recording"):
|
496 |
+
gr.Markdown("### Record and process your voice in real-time")
|
497 |
+
|
498 |
+
with gr.Row():
|
499 |
+
with gr.Column():
|
500 |
+
live_audio = gr.Audio(type="filepath", sources=["microphone"], label="Record Your Voice")
|
501 |
+
live_pitch = gr.Slider(-12, 12, 0, step=0.5, label="Pitch Correction")
|
502 |
+
live_reverb = gr.Slider(0, 0.5, 0, step=0.05, label="Reverb")
|
503 |
+
live_button = gr.Button("π€ Process Recording", variant="primary")
|
504 |
+
|
505 |
+
with gr.Column():
|
506 |
+
live_status = gr.Textbox(label="Status", lines=2, interactive=False)
|
507 |
+
live_analysis = gr.Textbox(label="Recording Analysis", lines=10, interactive=False)
|
508 |
+
|
509 |
+
live_output = gr.Audio(label="π§ Processed Recording", show_download_button=True)
|
510 |
+
|
511 |
+
# Style Coaching Tab
|
512 |
+
with gr.Tab("π Style Coaching"):
|
513 |
+
gr.Markdown("### Get personalized vocal coaching feedback")
|
514 |
+
|
515 |
+
with gr.Row():
|
516 |
+
with gr.Column():
|
517 |
+
coach_refs = gr.File(
|
518 |
+
label="Reference Tracks (2-5 files)",
|
519 |
+
file_count="multiple",
|
520 |
+
file_types=["audio"]
|
521 |
+
)
|
522 |
+
coach_user = gr.Audio(
|
523 |
+
type="filepath",
|
524 |
+
label="Your Performance",
|
525 |
+
sources=["upload", "microphone"]
|
526 |
+
)
|
527 |
+
coach_button = gr.Button("π― Get Coaching", variant="primary")
|
528 |
+
|
529 |
+
with gr.Column():
|
530 |
+
coach_status = gr.Textbox(label="Status", lines=3, interactive=False)
|
531 |
+
coach_refs_status = gr.Textbox(label="Reference Processing", lines=8, interactive=False)
|
532 |
+
|
533 |
+
coach_feedback = gr.Textbox(label="π― Coaching Feedback", lines=15, interactive=False)
|
534 |
+
|
535 |
+
# Help Tab
|
536 |
+
with gr.Tab("βΉοΈ Help"):
|
537 |
+
gr.Markdown("""
|
538 |
+
# π€ Audio Singing Helper - User Guide
|
539 |
+
|
540 |
+
## Features
|
541 |
+
|
542 |
+
### π΅ Audio Separation
|
543 |
+
- Upload any song to separate vocals from instruments
|
544 |
+
- Choose 2-stem (vocals + instrumental) or 4-stem (vocals + drums + bass + other)
|
545 |
+
- Get detailed audio analysis of your tracks
|
546 |
+
|
547 |
+
### ποΈ Vocal Effects
|
548 |
+
- Apply pitch shifting (-12 to +12 semitones)
|
549 |
+
- Add reverb for spatial depth
|
550 |
+
- Process any audio file with professional effects
|
551 |
+
|
552 |
+
### ποΈ Live Recording
|
553 |
+
- Record directly from your microphone
|
554 |
+
- Apply real-time pitch correction and reverb
|
555 |
+
- Perfect for vocal practice and experimentation
|
556 |
+
|
557 |
+
### π Style Coaching
|
558 |
+
- Upload 2-5 reference tracks from artists you want to emulate
|
559 |
+
- Record or upload your performance
|
560 |
+
- Get AI-powered feedback on pitch, timing, and vocal characteristics
|
561 |
+
- Receive a score and specific improvement suggestions
|
562 |
+
|
563 |
+
## Tips for Best Results
|
564 |
+
|
565 |
+
- **Use high-quality audio files** - better input = better results
|
566 |
+
- **Keep files under 5 minutes** for faster processing
|
567 |
+
- **For style coaching**: Choose references from similar genres
|
568 |
+
- **Record in quiet environments** for best analysis
|
569 |
+
|
570 |
+
## Supported Formats
|
571 |
+
- Input: MP3, WAV, FLAC, M4A, OGG
|
572 |
+
- Output: High-quality WAV files
|
573 |
+
|
574 |
+
## Technical Requirements
|
575 |
+
- Some features require additional dependencies
|
576 |
+
- Processing time varies based on file length and complexity
|
577 |
+
|
578 |
+
---
|
579 |
+
Built for singers and musicians worldwide π
|
580 |
+
""")
|
581 |
+
|
582 |
+
# Connect all the event handlers
|
583 |
+
sep_button.click(
|
584 |
+
process_audio_separation,
|
585 |
+
inputs=[sep_audio_input, sep_mode],
|
586 |
+
outputs=[sep_status, sep_vocals, sep_instrumental, sep_bass, sep_other, sep_analysis]
|
587 |
+
)
|
588 |
+
|
589 |
+
fx_button.click(
|
590 |
+
process_vocal_effects,
|
591 |
+
inputs=[fx_audio_input, fx_pitch, fx_reverb],
|
592 |
+
outputs=[fx_status, fx_output, fx_analysis]
|
593 |
+
)
|
594 |
+
|
595 |
+
live_button.click(
|
596 |
+
process_vocal_effects,
|
597 |
+
inputs=[live_audio, live_pitch, live_reverb],
|
598 |
+
outputs=[live_status, live_output, live_analysis]
|
599 |
+
)
|
600 |
+
|
601 |
+
coach_button.click(
|
602 |
+
process_style_coaching,
|
603 |
+
inputs=[coach_refs, coach_user],
|
604 |
+
outputs=[coach_status, coach_refs_status, coach_feedback]
|
605 |
+
)
|
606 |
+
|
607 |
+
return app
|
608 |
+
|
609 |
+
if __name__ == "__main__":
|
610 |
+
app = create_app()
|
611 |
+
app.launch()
|