musdfakoc commited on
Commit
fbb8eef
·
verified ·
1 Parent(s): a2aee3a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -2
app.py CHANGED
@@ -137,14 +137,25 @@ def magnitude_to_complex_spectrogram(magnitude_spectrogram):
137
  complex_spectrogram = torch.stack([magnitude_spectrogram, zero_phase], dim=-1)
138
  return complex_spectrogram
139
 
140
- # Convert spectrogram back to audio using inverse STFT
141
  def spectrogram_to_audio(magnitude_spectrogram):
 
142
  magnitude_spectrogram = torch.expm1(magnitude_spectrogram)
 
 
143
  complex_spectrogram = magnitude_to_complex_spectrogram(magnitude_spectrogram)
 
 
144
  audio = torch.istft(complex_spectrogram, n_fft=n_fft, hop_length=hop_length)
 
 
 
 
 
 
 
145
  return audio
146
 
147
- # Function to generate audio from an uploaded image
148
  def generate_audio_from_image(image):
149
  test_img = image_transform(image).unsqueeze(0).to(device) # Preprocess image
150
 
@@ -158,6 +169,7 @@ def generate_audio_from_image(image):
158
  # Convert audio tensor to numpy and return it for Gradio to handle
159
  return (sample_rate, generated_audio.numpy())
160
 
 
161
  # Gradio Interface
162
  def main():
163
  global generator # Declare the generator object globally
 
137
  complex_spectrogram = torch.stack([magnitude_spectrogram, zero_phase], dim=-1)
138
  return complex_spectrogram
139
 
 
140
  def spectrogram_to_audio(magnitude_spectrogram):
141
+ # Perform inverse log scaling
142
  magnitude_spectrogram = torch.expm1(magnitude_spectrogram)
143
+
144
+ # Convert magnitude-only spectrogram to complex format
145
  complex_spectrogram = magnitude_to_complex_spectrogram(magnitude_spectrogram)
146
+
147
+ # Inverse STFT to convert the spectrogram back to audio
148
  audio = torch.istft(complex_spectrogram, n_fft=n_fft, hop_length=hop_length)
149
+
150
+ # Normalize audio to the range [-1, 1] (standard audio range)
151
+ audio = audio / torch.max(torch.abs(audio))
152
+
153
+ # Convert audio to 16-bit integer format, as expected for WAV files
154
+ audio = (audio * 32767).short()
155
+
156
  return audio
157
 
158
+
159
  def generate_audio_from_image(image):
160
  test_img = image_transform(image).unsqueeze(0).to(device) # Preprocess image
161
 
 
169
  # Convert audio tensor to numpy and return it for Gradio to handle
170
  return (sample_rate, generated_audio.numpy())
171
 
172
+
173
  # Gradio Interface
174
  def main():
175
  global generator # Declare the generator object globally