musdfakoc commited on
Commit
eaee834
·
verified ·
1 Parent(s): c812d04

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -6
app.py CHANGED
@@ -145,19 +145,25 @@ def load_gan_model(generator, model_path, device):
145
  generator.eval() # Set the model to evaluation mode
146
  return generator
147
 
148
- # Convert magnitude-only spectrogram to complex format by assuming zero phase
149
  def magnitude_to_complex_spectrogram(magnitude_spectrogram):
 
150
  zero_phase = torch.zeros_like(magnitude_spectrogram)
151
- complex_spectrogram = torch.stack([magnitude_spectrogram, zero_phase], dim=-1)
 
 
 
152
  return complex_spectrogram
153
 
154
- # Convert spectrogram back to audio using inverse STFT
155
  def spectrogram_to_audio(magnitude_spectrogram):
156
- magnitude_spectrogram = torch.expm1(magnitude_spectrogram)
157
  complex_spectrogram = magnitude_to_complex_spectrogram(magnitude_spectrogram)
 
 
158
  audio = torch.istft(complex_spectrogram, n_fft=n_fft, hop_length=hop_length)
 
159
  return audio
160
 
 
161
  # Function to generate audio from an uploaded image
162
  def generate_audio_from_image(image):
163
  test_img = image_transform(image).unsqueeze(0).to(device) # Preprocess image
@@ -179,12 +185,16 @@ def main():
179
  generator = Generator(output_time_frames).to(device)
180
 
181
  # Load the pre-trained model
182
- model_path = './gan_model.pth' # Change this path
183
  generator = load_gan_model(generator, model_path, device)
184
 
185
  # Gradio interface: allow users to upload an image and generate audio
186
- iface = gr.Interface(fn=generate_audio_from_image, inputs=gr.Image(type="pil"), outputs=gr.Audio(type="numpy", label="Generated Audio"))
 
 
 
187
  iface.launch()
188
 
 
189
  if __name__ == "__main__":
190
  main()
 
145
  generator.eval() # Set the model to evaluation mode
146
  return generator
147
 
 
148
  def magnitude_to_complex_spectrogram(magnitude_spectrogram):
149
+ # Create a zero-phase tensor with the same shape as the magnitude spectrogram
150
  zero_phase = torch.zeros_like(magnitude_spectrogram)
151
+
152
+ # Create a complex-valued spectrogram using the magnitude and zero phase
153
+ complex_spectrogram = torch.complex(magnitude_spectrogram, zero_phase)
154
+
155
  return complex_spectrogram
156
 
 
157
  def spectrogram_to_audio(magnitude_spectrogram):
158
+ # Convert magnitude spectrogram to complex spectrogram
159
  complex_spectrogram = magnitude_to_complex_spectrogram(magnitude_spectrogram)
160
+
161
+ # Inverse STFT to convert the spectrogram back to audio
162
  audio = torch.istft(complex_spectrogram, n_fft=n_fft, hop_length=hop_length)
163
+
164
  return audio
165
 
166
+
167
  # Function to generate audio from an uploaded image
168
  def generate_audio_from_image(image):
169
  test_img = image_transform(image).unsqueeze(0).to(device) # Preprocess image
 
185
  generator = Generator(output_time_frames).to(device)
186
 
187
  # Load the pre-trained model
188
+ model_path = './gan_model.pth' # Ensure the model is in the correct relative path
189
  generator = load_gan_model(generator, model_path, device)
190
 
191
  # Gradio interface: allow users to upload an image and generate audio
192
+ iface = gr.Interface(fn=generate_audio_from_image,
193
+ inputs=gr.Image(type="pil"),
194
+ outputs=gr.Audio(type="numpy", label="Generated Audio"))
195
+
196
  iface.launch()
197
 
198
+
199
  if __name__ == "__main__":
200
  main()