musdfakoc commited on
Commit
af04a5c
·
verified ·
1 Parent(s): cf99eff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -137,7 +137,6 @@ def magnitude_to_complex_spectrogram(magnitude_spectrogram):
137
  complex_spectrogram = torch.stack([magnitude_spectrogram, zero_phase], dim=-1)
138
  return complex_spectrogram
139
 
140
-
141
  def spectrogram_to_audio(magnitude_spectrogram):
142
  # Perform inverse log scaling
143
  magnitude_spectrogram = torch.expm1(magnitude_spectrogram)
@@ -155,12 +154,16 @@ def spectrogram_to_audio(magnitude_spectrogram):
155
  # Clip the audio to ensure it fits in the range [-1, 1]
156
  audio = torch.clamp(audio, min=-1, max=1)
157
 
 
 
 
 
158
  # Convert to 16-bit PCM format by scaling and casting to int16
159
  audio = (audio * 32767).short()
160
 
161
- # Ensure the audio is in the correct format (int16)
162
  audio = audio.cpu().numpy().astype(np.int16)
163
-
164
  return audio
165
 
166
 
@@ -173,9 +176,6 @@ def generate_audio_from_image(image):
173
 
174
  # Convert the generated spectrogram to audio
175
  generated_audio = spectrogram_to_audio(generated_spectrogram.squeeze(0).cpu())
176
- if torch.isnan(generated_audio).any():
177
- raise ValueError("Generated audio contains NaN values.")
178
-
179
 
180
  # Return the audio as (sample_rate, audio) tuple for Gradio to process
181
  return (sample_rate, generated_audio)
 
137
  complex_spectrogram = torch.stack([magnitude_spectrogram, zero_phase], dim=-1)
138
  return complex_spectrogram
139
 
 
140
  def spectrogram_to_audio(magnitude_spectrogram):
141
  # Perform inverse log scaling
142
  magnitude_spectrogram = torch.expm1(magnitude_spectrogram)
 
154
  # Clip the audio to ensure it fits in the range [-1, 1]
155
  audio = torch.clamp(audio, min=-1, max=1)
156
 
157
+ # Check for NaNs in the audio tensor
158
+ if torch.isnan(audio).any():
159
+ raise ValueError("Generated audio contains NaN values.")
160
+
161
  # Convert to 16-bit PCM format by scaling and casting to int16
162
  audio = (audio * 32767).short()
163
 
164
+ # Convert audio tensor to numpy array for Gradio
165
  audio = audio.cpu().numpy().astype(np.int16)
166
+
167
  return audio
168
 
169
 
 
176
 
177
  # Convert the generated spectrogram to audio
178
  generated_audio = spectrogram_to_audio(generated_spectrogram.squeeze(0).cpu())
 
 
 
179
 
180
  # Return the audio as (sample_rate, audio) tuple for Gradio to process
181
  return (sample_rate, generated_audio)