Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -137,7 +137,6 @@ def magnitude_to_complex_spectrogram(magnitude_spectrogram):
|
|
137 |
complex_spectrogram = torch.stack([magnitude_spectrogram, zero_phase], dim=-1)
|
138 |
return complex_spectrogram
|
139 |
|
140 |
-
|
141 |
def spectrogram_to_audio(magnitude_spectrogram):
|
142 |
# Perform inverse log scaling
|
143 |
magnitude_spectrogram = torch.expm1(magnitude_spectrogram)
|
@@ -155,12 +154,16 @@ def spectrogram_to_audio(magnitude_spectrogram):
|
|
155 |
# Clip the audio to ensure it fits in the range [-1, 1]
|
156 |
audio = torch.clamp(audio, min=-1, max=1)
|
157 |
|
|
|
|
|
|
|
|
|
158 |
# Convert to 16-bit PCM format by scaling and casting to int16
|
159 |
audio = (audio * 32767).short()
|
160 |
|
161 |
-
#
|
162 |
audio = audio.cpu().numpy().astype(np.int16)
|
163 |
-
|
164 |
return audio
|
165 |
|
166 |
|
@@ -173,9 +176,6 @@ def generate_audio_from_image(image):
|
|
173 |
|
174 |
# Convert the generated spectrogram to audio
|
175 |
generated_audio = spectrogram_to_audio(generated_spectrogram.squeeze(0).cpu())
|
176 |
-
if torch.isnan(generated_audio).any():
|
177 |
-
raise ValueError("Generated audio contains NaN values.")
|
178 |
-
|
179 |
|
180 |
# Return the audio as (sample_rate, audio) tuple for Gradio to process
|
181 |
return (sample_rate, generated_audio)
|
|
|
137 |
complex_spectrogram = torch.stack([magnitude_spectrogram, zero_phase], dim=-1)
|
138 |
return complex_spectrogram
|
139 |
|
|
|
140 |
def spectrogram_to_audio(magnitude_spectrogram):
|
141 |
# Perform inverse log scaling
|
142 |
magnitude_spectrogram = torch.expm1(magnitude_spectrogram)
|
|
|
154 |
# Clip the audio to ensure it fits in the range [-1, 1]
|
155 |
audio = torch.clamp(audio, min=-1, max=1)
|
156 |
|
157 |
+
# Check for NaNs in the audio tensor
|
158 |
+
if torch.isnan(audio).any():
|
159 |
+
raise ValueError("Generated audio contains NaN values.")
|
160 |
+
|
161 |
# Convert to 16-bit PCM format by scaling and casting to int16
|
162 |
audio = (audio * 32767).short()
|
163 |
|
164 |
+
# Convert audio tensor to numpy array for Gradio
|
165 |
audio = audio.cpu().numpy().astype(np.int16)
|
166 |
+
|
167 |
return audio
|
168 |
|
169 |
|
|
|
176 |
|
177 |
# Convert the generated spectrogram to audio
|
178 |
generated_audio = spectrogram_to_audio(generated_spectrogram.squeeze(0).cpu())
|
|
|
|
|
|
|
179 |
|
180 |
# Return the audio as (sample_rate, audio) tuple for Gradio to process
|
181 |
return (sample_rate, generated_audio)
|