Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -121,8 +121,6 @@ def test_model(generator, test_img_path, output_audio_path, device):
|
|
121 |
# Convert the generated spectrogram to audio
|
122 |
generated_audio = spectrogram_to_audio(generated_spectrogram.squeeze(0).cpu()) # Remove batch dimension
|
123 |
|
124 |
-
print(f"Generated audio saved to {output_audio_path}")
|
125 |
-
|
126 |
# Load the pre-trained GAN model
|
127 |
def load_gan_model(generator, model_path, device):
|
128 |
generator.load_state_dict(torch.load(model_path, map_location=device))
|
@@ -179,18 +177,13 @@ def generate_audio_from_image(image):
|
|
179 |
# Convert to the required format (e.g., float32)
|
180 |
generated_audio = generated_audio.astype(np.float32)
|
181 |
|
182 |
-
#
|
183 |
-
|
184 |
-
generated_audio = np.stack([generated_audio, generated_audio], axis=0) # Duplicate for stereo
|
185 |
|
186 |
# Debug: Print the shape and type of the generated audio
|
187 |
-
print(f"Generated audio shape: {generated_audio.shape}, type: {generated_audio.dtype}")
|
188 |
-
|
189 |
-
# Ensure sample_rate is an integer
|
190 |
-
print(f"Sample rate: {sample_rate}, type: {type(sample_rate)}") # Debugging
|
191 |
|
192 |
-
|
193 |
-
return generated_audio, int(sample_rate) # Ensure sample_rate is an integer
|
194 |
|
195 |
|
196 |
|
|
|
121 |
# Convert the generated spectrogram to audio
|
122 |
generated_audio = spectrogram_to_audio(generated_spectrogram.squeeze(0).cpu()) # Remove batch dimension
|
123 |
|
|
|
|
|
124 |
# Load the pre-trained GAN model
|
125 |
def load_gan_model(generator, model_path, device):
|
126 |
generator.load_state_dict(torch.load(model_path, map_location=device))
|
|
|
177 |
# Convert to the required format (e.g., float32)
|
178 |
generated_audio = generated_audio.astype(np.float32)
|
179 |
|
180 |
+
# Transpose to make it (samples, channels)
|
181 |
+
generated_audio = generated_audio.T
|
|
|
182 |
|
183 |
# Debug: Print the shape and type of the generated audio
|
184 |
+
print(f"Generated audio shape after transpose: {generated_audio.shape}, type: {generated_audio.dtype}")
|
|
|
|
|
|
|
185 |
|
186 |
+
return generated_audio, sample_rate
|
|
|
187 |
|
188 |
|
189 |
|