musdfakoc commited on
Commit
755bafb
·
verified ·
1 Parent(s): 35945f8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -8
app.py CHANGED
@@ -153,7 +153,7 @@ def spectrogram_to_audio(magnitude_spectrogram):
153
  # Inverse STFT to convert the spectrogram back to audio
154
  audio = torch.istft(complex_spectrogram, n_fft=n_fft, hop_length=hop_length)
155
 
156
- # Handle any NaNs or Infs
157
  audio = torch.nan_to_num(audio, nan=0.0, posinf=0.0, neginf=0.0)
158
 
159
  # Normalize audio to the range [-1, 1] (standard audio range)
@@ -163,13 +163,17 @@ def spectrogram_to_audio(magnitude_spectrogram):
163
  # Clip the audio to the range [-1, 1] to avoid out-of-bounds values
164
  audio = torch.clamp(audio, min=-1, max=1)
165
 
166
- # Convert the audio to 16-bit PCM format by scaling and casting to int16
167
  audio = (audio * 32767).short()
168
 
169
- # Ensure audio is in the valid range for int16
170
  audio = torch.clamp(audio, min=-32768, max=32767)
171
 
172
- return audio
 
 
 
 
173
 
174
 
175
  def generate_audio_from_image(image):
@@ -180,10 +184,7 @@ def generate_audio_from_image(image):
180
  generated_spectrogram = generator(test_img)
181
 
182
  # Convert the generated spectrogram to audio
183
- generated_audio = spectrogram_to_audio(generated_spectrogram.squeeze(0).cpu())
184
-
185
- # Convert the audio tensor to a NumPy array before passing it to Gradio
186
- generated_audio_numpy = generated_audio.numpy().astype(np.int16) # Ensure it's int16
187
 
188
  # Return the sample rate and the NumPy array containing the audio
189
  return (sample_rate, generated_audio_numpy)
 
153
  # Inverse STFT to convert the spectrogram back to audio
154
  audio = torch.istft(complex_spectrogram, n_fft=n_fft, hop_length=hop_length)
155
 
156
+ # Handle NaNs or Infs in the audio and replace them with zeros
157
  audio = torch.nan_to_num(audio, nan=0.0, posinf=0.0, neginf=0.0)
158
 
159
  # Normalize audio to the range [-1, 1] (standard audio range)
 
163
  # Clip the audio to the range [-1, 1] to avoid out-of-bounds values
164
  audio = torch.clamp(audio, min=-1, max=1)
165
 
166
+ # Convert to 16-bit PCM format by scaling and casting to int16
167
  audio = (audio * 32767).short()
168
 
169
+ # Ensure the audio is clipped to the valid range for int16
170
  audio = torch.clamp(audio, min=-32768, max=32767)
171
 
172
+ # Convert the audio to a NumPy array in int16 format
173
+ audio_numpy = audio.cpu().numpy().astype(np.int16)
174
+
175
+ return audio_numpy
176
+
177
 
178
 
179
  def generate_audio_from_image(image):
 
184
  generated_spectrogram = generator(test_img)
185
 
186
  # Convert the generated spectrogram to audio
187
+ generated_audio_numpy = spectrogram_to_audio(generated_spectrogram.squeeze(0).cpu())
 
 
 
188
 
189
  # Return the sample rate and the NumPy array containing the audio
190
  return (sample_rate, generated_audio_numpy)