Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -137,14 +137,25 @@ def magnitude_to_complex_spectrogram(magnitude_spectrogram):
|
|
137 |
complex_spectrogram = torch.stack([magnitude_spectrogram, zero_phase], dim=-1)
|
138 |
return complex_spectrogram
|
139 |
|
140 |
-
# Convert spectrogram back to audio using inverse STFT
|
141 |
def spectrogram_to_audio(magnitude_spectrogram):
|
|
|
142 |
magnitude_spectrogram = torch.expm1(magnitude_spectrogram)
|
|
|
|
|
143 |
complex_spectrogram = magnitude_to_complex_spectrogram(magnitude_spectrogram)
|
|
|
|
|
144 |
audio = torch.istft(complex_spectrogram, n_fft=n_fft, hop_length=hop_length)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
return audio
|
146 |
|
147 |
-
|
148 |
def generate_audio_from_image(image):
|
149 |
test_img = image_transform(image).unsqueeze(0).to(device) # Preprocess image
|
150 |
|
@@ -158,6 +169,7 @@ def generate_audio_from_image(image):
|
|
158 |
# Convert audio tensor to numpy and return it for Gradio to handle
|
159 |
return (sample_rate, generated_audio.numpy())
|
160 |
|
|
|
161 |
# Gradio Interface
|
162 |
def main():
|
163 |
global generator # Declare the generator object globally
|
|
|
137 |
complex_spectrogram = torch.stack([magnitude_spectrogram, zero_phase], dim=-1)
|
138 |
return complex_spectrogram
|
139 |
|
|
|
140 |
def spectrogram_to_audio(magnitude_spectrogram):
|
141 |
+
# Perform inverse log scaling
|
142 |
magnitude_spectrogram = torch.expm1(magnitude_spectrogram)
|
143 |
+
|
144 |
+
# Convert magnitude-only spectrogram to complex format
|
145 |
complex_spectrogram = magnitude_to_complex_spectrogram(magnitude_spectrogram)
|
146 |
+
|
147 |
+
# Inverse STFT to convert the spectrogram back to audio
|
148 |
audio = torch.istft(complex_spectrogram, n_fft=n_fft, hop_length=hop_length)
|
149 |
+
|
150 |
+
# Normalize audio to the range [-1, 1] (standard audio range)
|
151 |
+
audio = audio / torch.max(torch.abs(audio))
|
152 |
+
|
153 |
+
# Convert audio to 16-bit integer format, as expected for WAV files
|
154 |
+
audio = (audio * 32767).short()
|
155 |
+
|
156 |
return audio
|
157 |
|
158 |
+
|
159 |
def generate_audio_from_image(image):
|
160 |
test_img = image_transform(image).unsqueeze(0).to(device) # Preprocess image
|
161 |
|
|
|
169 |
# Convert audio tensor to numpy and return it for Gradio to handle
|
170 |
return (sample_rate, generated_audio.numpy())
|
171 |
|
172 |
+
|
173 |
# Gradio Interface
|
174 |
def main():
|
175 |
global generator # Declare the generator object globally
|