Spaces:

musdfakoc
/

local_intelligence

Sleeping

musdfakoc commited on Sep 30, 2024

Commit

5af138b

verified ·

1 Parent(s): 0141d56

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -153,14 +153,22 @@ def spectrogram_to_audio(magnitude_spectrogram):
     # Inverse STFT to convert the spectrogram back to audio
     audio = torch.istft(complex_spectrogram, n_fft=n_fft, hop_length=hop_length)
-    # Normalize and clip the audio
     if torch.max(torch.abs(audio)) != 0:
         audio = audio / torch.max(torch.abs(audio))
     audio = torch.clamp(audio, min=-1, max=1)
-    # Convert to 16-bit PCM format
     audio = (audio * 32767).short()
     return audio
@@ -181,6 +189,7 @@ def generate_audio_from_image(image):
     return (sample_rate, generated_audio_numpy)
 # Gradio Interface
 def main():
     global generator  # Declare the generator object globally

     # Inverse STFT to convert the spectrogram back to audio
     audio = torch.istft(complex_spectrogram, n_fft=n_fft, hop_length=hop_length)
+    # Check for NaNs or Infs in the audio and replace them with zeros
+    audio = torch.nan_to_num(audio, nan=0.0, posinf=0.0, neginf=0.0)
+    # Normalize audio to the range [-1, 1]
     if torch.max(torch.abs(audio)) != 0:
         audio = audio / torch.max(torch.abs(audio))
+    # Clip the audio to ensure it fits in the range [-1, 1]
     audio = torch.clamp(audio, min=-1, max=1)
+    # Convert to 16-bit PCM format by scaling and casting to int16
     audio = (audio * 32767).short()
+    # Ensure the values are clipped to the int16 range
+    audio = torch.clamp(audio, min=-32768, max=32767)
     return audio
     return (sample_rate, generated_audio_numpy)
 # Gradio Interface
 def main():
     global generator  # Declare the generator object globally