Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -145,19 +145,25 @@ def load_gan_model(generator, model_path, device):
|
|
145 |
generator.eval() # Set the model to evaluation mode
|
146 |
return generator
|
147 |
|
148 |
-
# Convert magnitude-only spectrogram to complex format by assuming zero phase
|
149 |
def magnitude_to_complex_spectrogram(magnitude_spectrogram):
|
|
|
150 |
zero_phase = torch.zeros_like(magnitude_spectrogram)
|
151 |
-
|
|
|
|
|
|
|
152 |
return complex_spectrogram
|
153 |
|
154 |
-
# Convert spectrogram back to audio using inverse STFT
|
155 |
def spectrogram_to_audio(magnitude_spectrogram):
|
156 |
-
|
157 |
complex_spectrogram = magnitude_to_complex_spectrogram(magnitude_spectrogram)
|
|
|
|
|
158 |
audio = torch.istft(complex_spectrogram, n_fft=n_fft, hop_length=hop_length)
|
|
|
159 |
return audio
|
160 |
|
|
|
161 |
# Function to generate audio from an uploaded image
|
162 |
def generate_audio_from_image(image):
|
163 |
test_img = image_transform(image).unsqueeze(0).to(device) # Preprocess image
|
@@ -179,12 +185,16 @@ def main():
|
|
179 |
generator = Generator(output_time_frames).to(device)
|
180 |
|
181 |
# Load the pre-trained model
|
182 |
-
model_path = './gan_model.pth' #
|
183 |
generator = load_gan_model(generator, model_path, device)
|
184 |
|
185 |
# Gradio interface: allow users to upload an image and generate audio
|
186 |
-
iface = gr.Interface(fn=generate_audio_from_image,
|
|
|
|
|
|
|
187 |
iface.launch()
|
188 |
|
|
|
189 |
if __name__ == "__main__":
|
190 |
main()
|
|
|
145 |
generator.eval() # Set the model to evaluation mode
|
146 |
return generator
|
147 |
|
|
|
148 |
def magnitude_to_complex_spectrogram(magnitude_spectrogram):
|
149 |
+
# Create a zero-phase tensor with the same shape as the magnitude spectrogram
|
150 |
zero_phase = torch.zeros_like(magnitude_spectrogram)
|
151 |
+
|
152 |
+
# Create a complex-valued spectrogram using the magnitude and zero phase
|
153 |
+
complex_spectrogram = torch.complex(magnitude_spectrogram, zero_phase)
|
154 |
+
|
155 |
return complex_spectrogram
|
156 |
|
|
|
157 |
def spectrogram_to_audio(magnitude_spectrogram):
|
158 |
+
# Convert magnitude spectrogram to complex spectrogram
|
159 |
complex_spectrogram = magnitude_to_complex_spectrogram(magnitude_spectrogram)
|
160 |
+
|
161 |
+
# Inverse STFT to convert the spectrogram back to audio
|
162 |
audio = torch.istft(complex_spectrogram, n_fft=n_fft, hop_length=hop_length)
|
163 |
+
|
164 |
return audio
|
165 |
|
166 |
+
|
167 |
# Function to generate audio from an uploaded image
|
168 |
def generate_audio_from_image(image):
|
169 |
test_img = image_transform(image).unsqueeze(0).to(device) # Preprocess image
|
|
|
185 |
generator = Generator(output_time_frames).to(device)
|
186 |
|
187 |
# Load the pre-trained model
|
188 |
+
model_path = './gan_model.pth' # Ensure the model is in the correct relative path
|
189 |
generator = load_gan_model(generator, model_path, device)
|
190 |
|
191 |
# Gradio interface: allow users to upload an image and generate audio
|
192 |
+
iface = gr.Interface(fn=generate_audio_from_image,
|
193 |
+
inputs=gr.Image(type="pil"),
|
194 |
+
outputs=gr.Audio(type="numpy", label="Generated Audio"))
|
195 |
+
|
196 |
iface.launch()
|
197 |
|
198 |
+
|
199 |
if __name__ == "__main__":
|
200 |
main()
|