Spaces:

musdfakoc
/

local_intelligence

Sleeping

musdfakoc commited on Sep 30, 2024

Commit

cf99eff

verified ·

1 Parent(s): 2efb66f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -149,14 +149,18 @@ def spectrogram_to_audio(magnitude_spectrogram):
     audio = torch.istft(complex_spectrogram, n_fft=n_fft, hop_length=hop_length)
     # Normalize audio to the range [-1, 1] (standard audio range)
-    audio = audio / torch.max(torch.abs(audio))
     # Clip the audio to ensure it fits in the range [-1, 1]
     audio = torch.clamp(audio, min=-1, max=1)
-    # Convert to 16-bit PCM format by scaling and casting
     audio = (audio * 32767).short()
     return audio
@@ -169,9 +173,12 @@ def generate_audio_from_image(image):
     # Convert the generated spectrogram to audio
     generated_audio = spectrogram_to_audio(generated_spectrogram.squeeze(0).cpu())
-    # Convert audio tensor to numpy and return it for Gradio to handle
-    return (sample_rate, generated_audio.numpy())
 # Gradio Interface

     audio = torch.istft(complex_spectrogram, n_fft=n_fft, hop_length=hop_length)
     # Normalize audio to the range [-1, 1] (standard audio range)
+    if torch.max(torch.abs(audio)) != 0:
+        audio = audio / torch.max(torch.abs(audio))
     # Clip the audio to ensure it fits in the range [-1, 1]
     audio = torch.clamp(audio, min=-1, max=1)
+    # Convert to 16-bit PCM format by scaling and casting to int16
     audio = (audio * 32767).short()
+    # Ensure the audio is in the correct format (int16)
+    audio = audio.cpu().numpy().astype(np.int16)
     return audio
     # Convert the generated spectrogram to audio
     generated_audio = spectrogram_to_audio(generated_spectrogram.squeeze(0).cpu())
+    if torch.isnan(generated_audio).any():
+        raise ValueError("Generated audio contains NaN values.")
+    # Return the audio as (sample_rate, audio) tuple for Gradio to process
+    return (sample_rate, generated_audio)
 # Gradio Interface