musdfakoc commited on
Commit
7a2a54e
·
verified ·
1 Parent(s): af19cb5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -5
app.py CHANGED
@@ -139,15 +139,20 @@ def magnitude_to_complex_spectrogram(magnitude_spectrogram):
139
  return complex_spectrogram
140
 
141
  def spectrogram_to_audio(magnitude_spectrogram):
142
- # Convert magnitude spectrogram to complex spectrogram
143
  complex_spectrogram = magnitude_to_complex_spectrogram(magnitude_spectrogram)
144
 
 
 
 
145
  # Inverse STFT to convert the spectrogram back to audio
146
- audio = torch.istft(complex_spectrogram, n_fft=n_fft, hop_length=hop_length)
147
-
148
  return audio
149
 
150
 
 
 
151
  def generate_audio_from_image(image):
152
  if image is None:
153
  raise ValueError("The uploaded image is 'None'. Please check the Gradio input.")
@@ -163,8 +168,19 @@ def generate_audio_from_image(image):
163
  # Convert the generated spectrogram to audio
164
  generated_audio = spectrogram_to_audio(generated_spectrogram.squeeze(0).cpu())
165
 
166
- # Convert audio tensor to numpy and return it for Gradio to handle
167
- return generated_audio.numpy(), sample_rate
 
 
 
 
 
 
 
 
 
 
 
168
 
169
 
170
  # Gradio Interface
 
139
  return complex_spectrogram
140
 
141
  def spectrogram_to_audio(magnitude_spectrogram):
142
+ # Convert magnitude-only spectrogram to complex format
143
  complex_spectrogram = magnitude_to_complex_spectrogram(magnitude_spectrogram)
144
 
145
+ # Provide a rectangular window to suppress the warning
146
+ window = torch.ones(n_fft, device=complex_spectrogram.device)
147
+
148
  # Inverse STFT to convert the spectrogram back to audio
149
+ audio = torch.istft(complex_spectrogram, n_fft=n_fft, hop_length=hop_length, window=window)
150
+
151
  return audio
152
 
153
 
154
+ import numpy as np
155
+
156
  def generate_audio_from_image(image):
157
  if image is None:
158
  raise ValueError("The uploaded image is 'None'. Please check the Gradio input.")
 
168
  # Convert the generated spectrogram to audio
169
  generated_audio = spectrogram_to_audio(generated_spectrogram.squeeze(0).cpu())
170
 
171
+ # Ensure the audio is a NumPy array and properly formatted
172
+ generated_audio = generated_audio.numpy()
173
+
174
+ # Normalize the audio to fit between -1 and 1 for proper playback
175
+ max_value = np.abs(generated_audio).max()
176
+ if max_value > 0:
177
+ generated_audio = generated_audio / max_value
178
+
179
+ # Convert to the required format (e.g., float32)
180
+ generated_audio = generated_audio.astype(np.float32)
181
+
182
+ return generated_audio, sample_rate
183
+
184
 
185
 
186
  # Gradio Interface