Spaces:

musdfakoc
/

local_intelligence

Sleeping

App Files Files Community

musdfakoc commited on Sep 30, 2024

Commit

86448a8

verified ·

1 Parent(s): 4970be7

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -12

app.py CHANGED Viewed

@@ -144,13 +144,14 @@ def magnitude_to_complex_spectrogram(magnitude_spectrogram):
 def spectrogram_to_audio(magnitude_spectrogram):
-    # Perform inverse log scaling to undo any log scaling applied to the spectrogram
     magnitude_spectrogram = torch.expm1(magnitude_spectrogram)
-    # Convert magnitude-only spectrogram to complex format
-    complex_spectrogram = magnitude_to_complex_spectrogram(magnitude_spectrogram)
-    # Use inverse STFT to convert the spectrogram back to time-domain audio
     audio = torch.istft(complex_spectrogram, n_fft=n_fft, hop_length=hop_length)
     # Handle NaNs or Infs in the audio and replace them with zeros
@@ -160,16 +161,16 @@ def spectrogram_to_audio(magnitude_spectrogram):
     if torch.max(torch.abs(audio)) != 0:
         audio = audio / torch.max(torch.abs(audio))
-    # Clip the audio to the range [-1, 1] to avoid out-of-bounds values
     audio = torch.clamp(audio, min=-1, max=1)
-    # Scale the audio to 16-bit PCM format and convert to int16
     audio = (audio * 32767).short()
-    # Ensure the audio is clipped to the valid range for int16 [-32768, 32767]
     audio = torch.clamp(audio, min=-32768, max=32767)
-    # Convert to a NumPy array and ensure it's in the correct format
     audio_numpy = audio.cpu().numpy().astype(np.int16)
     return audio_numpy
@@ -178,21 +179,20 @@ def spectrogram_to_audio(magnitude_spectrogram):
 def generate_audio_from_image(image):
-    test_img = image_transform(image).unsqueeze(0).to(device)  # Preprocess the input image
-    # Generate a sound spectrogram from the image using the pre-trained GAN model
     with torch.no_grad():
         generated_spectrogram = generator(test_img)
     # Convert the generated spectrogram to time-domain audio
     generated_audio_numpy = spectrogram_to_audio(generated_spectrogram.squeeze(0).cpu())
-    # Return the sample rate and the NumPy array containing the audio data
     return (sample_rate, generated_audio_numpy)
 # Gradio Interface
 def main():
     global generator  # Declare the generator object globally

 def spectrogram_to_audio(magnitude_spectrogram):
+    # Perform inverse log scaling to undo any log scaling
     magnitude_spectrogram = torch.expm1(magnitude_spectrogram)
+    # Convert magnitude-only spectrogram to complex format (real part and zero imaginary)
+    zero_phase = torch.zeros_like(magnitude_spectrogram)
+    complex_spectrogram = torch.stack([magnitude_spectrogram, zero_phase], dim=-1)
+    # Inverse STFT to convert the spectrogram back to time-domain audio
     audio = torch.istft(complex_spectrogram, n_fft=n_fft, hop_length=hop_length)
     # Handle NaNs or Infs in the audio and replace them with zeros
     if torch.max(torch.abs(audio)) != 0:
         audio = audio / torch.max(torch.abs(audio))
+    # Clip the audio to ensure no values fall outside the range [-1, 1]
     audio = torch.clamp(audio, min=-1, max=1)
+    # Convert to 16-bit PCM format by scaling and casting to int16
     audio = (audio * 32767).short()
+    # Ensure the audio is in the valid range for int16 [-32768, 32767]
     audio = torch.clamp(audio, min=-32768, max=32767)
+    # Convert the audio to a NumPy array of int16
     audio_numpy = audio.cpu().numpy().astype(np.int16)
     return audio_numpy
 def generate_audio_from_image(image):
+    test_img = image_transform(image).unsqueeze(0).to(device)  # Preprocess the image
+    # Generate a sound spectrogram from the image using the loaded generator
     with torch.no_grad():
         generated_spectrogram = generator(test_img)
     # Convert the generated spectrogram to time-domain audio
     generated_audio_numpy = spectrogram_to_audio(generated_spectrogram.squeeze(0).cpu())
+    # Return the sample rate and the audio in the correct format for Gradio
     return (sample_rate, generated_audio_numpy)
 # Gradio Interface
 def main():
     global generator  # Declare the generator object globally