Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -153,14 +153,22 @@ def spectrogram_to_audio(magnitude_spectrogram):
|
|
153 |
# Inverse STFT to convert the spectrogram back to audio
|
154 |
audio = torch.istft(complex_spectrogram, n_fft=n_fft, hop_length=hop_length)
|
155 |
|
156 |
-
#
|
|
|
|
|
|
|
157 |
if torch.max(torch.abs(audio)) != 0:
|
158 |
audio = audio / torch.max(torch.abs(audio))
|
|
|
|
|
159 |
audio = torch.clamp(audio, min=-1, max=1)
|
160 |
|
161 |
-
# Convert to 16-bit PCM format
|
162 |
audio = (audio * 32767).short()
|
163 |
|
|
|
|
|
|
|
164 |
return audio
|
165 |
|
166 |
|
@@ -181,6 +189,7 @@ def generate_audio_from_image(image):
|
|
181 |
return (sample_rate, generated_audio_numpy)
|
182 |
|
183 |
|
|
|
184 |
# Gradio Interface
|
185 |
def main():
|
186 |
global generator # Declare the generator object globally
|
|
|
153 |
# Inverse STFT to convert the spectrogram back to audio
|
154 |
audio = torch.istft(complex_spectrogram, n_fft=n_fft, hop_length=hop_length)
|
155 |
|
156 |
+
# Check for NaNs or Infs in the audio and replace them with zeros
|
157 |
+
audio = torch.nan_to_num(audio, nan=0.0, posinf=0.0, neginf=0.0)
|
158 |
+
|
159 |
+
# Normalize audio to the range [-1, 1]
|
160 |
if torch.max(torch.abs(audio)) != 0:
|
161 |
audio = audio / torch.max(torch.abs(audio))
|
162 |
+
|
163 |
+
# Clip the audio to ensure it fits in the range [-1, 1]
|
164 |
audio = torch.clamp(audio, min=-1, max=1)
|
165 |
|
166 |
+
# Convert to 16-bit PCM format by scaling and casting to int16
|
167 |
audio = (audio * 32767).short()
|
168 |
|
169 |
+
# Ensure the values are clipped to the int16 range
|
170 |
+
audio = torch.clamp(audio, min=-32768, max=32767)
|
171 |
+
|
172 |
return audio
|
173 |
|
174 |
|
|
|
189 |
return (sample_rate, generated_audio_numpy)
|
190 |
|
191 |
|
192 |
+
|
193 |
# Gradio Interface
|
194 |
def main():
|
195 |
global generator # Declare the generator object globally
|