musdfakoc commited on
Commit
5af138b
·
verified ·
1 Parent(s): 0141d56

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -2
app.py CHANGED
@@ -153,14 +153,22 @@ def spectrogram_to_audio(magnitude_spectrogram):
153
  # Inverse STFT to convert the spectrogram back to audio
154
  audio = torch.istft(complex_spectrogram, n_fft=n_fft, hop_length=hop_length)
155
 
156
- # Normalize and clip the audio
 
 
 
157
  if torch.max(torch.abs(audio)) != 0:
158
  audio = audio / torch.max(torch.abs(audio))
 
 
159
  audio = torch.clamp(audio, min=-1, max=1)
160
 
161
- # Convert to 16-bit PCM format
162
  audio = (audio * 32767).short()
163
 
 
 
 
164
  return audio
165
 
166
 
@@ -181,6 +189,7 @@ def generate_audio_from_image(image):
181
  return (sample_rate, generated_audio_numpy)
182
 
183
 
 
184
  # Gradio Interface
185
  def main():
186
  global generator # Declare the generator object globally
 
153
  # Inverse STFT to convert the spectrogram back to audio
154
  audio = torch.istft(complex_spectrogram, n_fft=n_fft, hop_length=hop_length)
155
 
156
+ # Check for NaNs or Infs in the audio and replace them with zeros
157
+ audio = torch.nan_to_num(audio, nan=0.0, posinf=0.0, neginf=0.0)
158
+
159
+ # Normalize audio to the range [-1, 1]
160
  if torch.max(torch.abs(audio)) != 0:
161
  audio = audio / torch.max(torch.abs(audio))
162
+
163
+ # Clip the audio to ensure it fits in the range [-1, 1]
164
  audio = torch.clamp(audio, min=-1, max=1)
165
 
166
+ # Convert to 16-bit PCM format by scaling and casting to int16
167
  audio = (audio * 32767).short()
168
 
169
+ # Ensure the values are clipped to the int16 range
170
+ audio = torch.clamp(audio, min=-32768, max=32767)
171
+
172
  return audio
173
 
174
 
 
189
  return (sample_rate, generated_audio_numpy)
190
 
191
 
192
+
193
  # Gradio Interface
194
  def main():
195
  global generator # Declare the generator object globally