musdfakoc commited on
Commit
cf99eff
·
verified ·
1 Parent(s): 2efb66f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -4
app.py CHANGED
@@ -149,14 +149,18 @@ def spectrogram_to_audio(magnitude_spectrogram):
149
  audio = torch.istft(complex_spectrogram, n_fft=n_fft, hop_length=hop_length)
150
 
151
  # Normalize audio to the range [-1, 1] (standard audio range)
152
- audio = audio / torch.max(torch.abs(audio))
 
153
 
154
  # Clip the audio to ensure it fits in the range [-1, 1]
155
  audio = torch.clamp(audio, min=-1, max=1)
156
 
157
- # Convert to 16-bit PCM format by scaling and casting
158
  audio = (audio * 32767).short()
159
 
 
 
 
160
  return audio
161
 
162
 
@@ -169,9 +173,12 @@ def generate_audio_from_image(image):
169
 
170
  # Convert the generated spectrogram to audio
171
  generated_audio = spectrogram_to_audio(generated_spectrogram.squeeze(0).cpu())
 
 
 
172
 
173
- # Convert audio tensor to numpy and return it for Gradio to handle
174
- return (sample_rate, generated_audio.numpy())
175
 
176
 
177
  # Gradio Interface
 
149
  audio = torch.istft(complex_spectrogram, n_fft=n_fft, hop_length=hop_length)
150
 
151
  # Normalize audio to the range [-1, 1] (standard audio range)
152
+ if torch.max(torch.abs(audio)) != 0:
153
+ audio = audio / torch.max(torch.abs(audio))
154
 
155
  # Clip the audio to ensure it fits in the range [-1, 1]
156
  audio = torch.clamp(audio, min=-1, max=1)
157
 
158
+ # Convert to 16-bit PCM format by scaling and casting to int16
159
  audio = (audio * 32767).short()
160
 
161
+ # Ensure the audio is in the correct format (int16)
162
+ audio = audio.cpu().numpy().astype(np.int16)
163
+
164
  return audio
165
 
166
 
 
173
 
174
  # Convert the generated spectrogram to audio
175
  generated_audio = spectrogram_to_audio(generated_spectrogram.squeeze(0).cpu())
176
+ if torch.isnan(generated_audio).any():
177
+ raise ValueError("Generated audio contains NaN values.")
178
+
179
 
180
+ # Return the audio as (sample_rate, audio) tuple for Gradio to process
181
+ return (sample_rate, generated_audio)
182
 
183
 
184
  # Gradio Interface