Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -149,14 +149,18 @@ def spectrogram_to_audio(magnitude_spectrogram):
|
|
149 |
audio = torch.istft(complex_spectrogram, n_fft=n_fft, hop_length=hop_length)
|
150 |
|
151 |
# Normalize audio to the range [-1, 1] (standard audio range)
|
152 |
-
|
|
|
153 |
|
154 |
# Clip the audio to ensure it fits in the range [-1, 1]
|
155 |
audio = torch.clamp(audio, min=-1, max=1)
|
156 |
|
157 |
-
# Convert to 16-bit PCM format by scaling and casting
|
158 |
audio = (audio * 32767).short()
|
159 |
|
|
|
|
|
|
|
160 |
return audio
|
161 |
|
162 |
|
@@ -169,9 +173,12 @@ def generate_audio_from_image(image):
|
|
169 |
|
170 |
# Convert the generated spectrogram to audio
|
171 |
generated_audio = spectrogram_to_audio(generated_spectrogram.squeeze(0).cpu())
|
|
|
|
|
|
|
172 |
|
173 |
-
#
|
174 |
-
return (sample_rate, generated_audio
|
175 |
|
176 |
|
177 |
# Gradio Interface
|
|
|
149 |
audio = torch.istft(complex_spectrogram, n_fft=n_fft, hop_length=hop_length)
|
150 |
|
151 |
# Normalize audio to the range [-1, 1] (standard audio range)
|
152 |
+
if torch.max(torch.abs(audio)) != 0:
|
153 |
+
audio = audio / torch.max(torch.abs(audio))
|
154 |
|
155 |
# Clip the audio to ensure it fits in the range [-1, 1]
|
156 |
audio = torch.clamp(audio, min=-1, max=1)
|
157 |
|
158 |
+
# Convert to 16-bit PCM format by scaling and casting to int16
|
159 |
audio = (audio * 32767).short()
|
160 |
|
161 |
+
# Ensure the audio is in the correct format (int16)
|
162 |
+
audio = audio.cpu().numpy().astype(np.int16)
|
163 |
+
|
164 |
return audio
|
165 |
|
166 |
|
|
|
173 |
|
174 |
# Convert the generated spectrogram to audio
|
175 |
generated_audio = spectrogram_to_audio(generated_spectrogram.squeeze(0).cpu())
|
176 |
+
if torch.isnan(generated_audio).any():
|
177 |
+
raise ValueError("Generated audio contains NaN values.")
|
178 |
+
|
179 |
|
180 |
+
# Return the audio as (sample_rate, audio) tuple for Gradio to process
|
181 |
+
return (sample_rate, generated_audio)
|
182 |
|
183 |
|
184 |
# Gradio Interface
|