musdfakoc commited on
Commit
ceb7e4c
·
verified ·
1 Parent(s): 8cea3ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -11
app.py CHANGED
@@ -121,8 +121,6 @@ def test_model(generator, test_img_path, output_audio_path, device):
121
  # Convert the generated spectrogram to audio
122
  generated_audio = spectrogram_to_audio(generated_spectrogram.squeeze(0).cpu()) # Remove batch dimension
123
 
124
- print(f"Generated audio saved to {output_audio_path}")
125
-
126
  # Load the pre-trained GAN model
127
  def load_gan_model(generator, model_path, device):
128
  generator.load_state_dict(torch.load(model_path, map_location=device))
@@ -179,18 +177,13 @@ def generate_audio_from_image(image):
179
  # Convert to the required format (e.g., float32)
180
  generated_audio = generated_audio.astype(np.float32)
181
 
182
- # Ensure the output is stereo with 2 channels, even if audio is mono
183
- if len(generated_audio.shape) == 1:
184
- generated_audio = np.stack([generated_audio, generated_audio], axis=0) # Duplicate for stereo
185
 
186
  # Debug: Print the shape and type of the generated audio
187
- print(f"Generated audio shape: {generated_audio.shape}, type: {generated_audio.dtype}")
188
-
189
- # Ensure sample_rate is an integer
190
- print(f"Sample rate: {sample_rate}, type: {type(sample_rate)}") # Debugging
191
 
192
- # Return the audio and sample rate in the correct format
193
- return generated_audio, int(sample_rate) # Ensure sample_rate is an integer
194
 
195
 
196
 
 
121
  # Convert the generated spectrogram to audio
122
  generated_audio = spectrogram_to_audio(generated_spectrogram.squeeze(0).cpu()) # Remove batch dimension
123
 
 
 
124
  # Load the pre-trained GAN model
125
  def load_gan_model(generator, model_path, device):
126
  generator.load_state_dict(torch.load(model_path, map_location=device))
 
177
  # Convert to the required format (e.g., float32)
178
  generated_audio = generated_audio.astype(np.float32)
179
 
180
+ # Transpose to make it (samples, channels)
181
+ generated_audio = generated_audio.T
 
182
 
183
  # Debug: Print the shape and type of the generated audio
184
+ print(f"Generated audio shape after transpose: {generated_audio.shape}, type: {generated_audio.dtype}")
 
 
 
185
 
186
+ return generated_audio, sample_rate
 
187
 
188
 
189