Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -145,40 +145,20 @@ def magnitude_to_complex_spectrogram(magnitude_spectrogram):
|
|
| 145 |
|
| 146 |
def spectrogram_to_audio(magnitude_spectrogram):
|
| 147 |
# Perform inverse log scaling
|
| 148 |
-
|
| 149 |
-
magnitude_spectrogram = torch.expm1(magnitude_spectrogram)
|
| 150 |
-
if torch.isnan(magnitude_spectrogram).any():
|
| 151 |
-
raise ValueError("NaN values found in magnitude_spectrogram after expm1.")
|
| 152 |
-
except Exception as e:
|
| 153 |
-
raise ValueError(f"Error in expm1 operation: {e}")
|
| 154 |
|
| 155 |
# Convert magnitude-only spectrogram to complex format
|
| 156 |
-
|
| 157 |
-
complex_spectrogram = magnitude_to_complex_spectrogram(magnitude_spectrogram)
|
| 158 |
-
if torch.isnan(complex_spectrogram).any():
|
| 159 |
-
raise ValueError("Complex spectrogram contains NaN values after conversion.")
|
| 160 |
-
except Exception as e:
|
| 161 |
-
raise ValueError(f"Error in complex spectrogram creation: {e}")
|
| 162 |
|
| 163 |
# Inverse STFT to convert the spectrogram back to audio
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
raise ValueError(f"Error in istft operation: {e}")
|
| 170 |
-
|
| 171 |
-
# Normalize audio to the range [-1, 1] (standard audio range)
|
| 172 |
-
try:
|
| 173 |
-
if torch.max(torch.abs(audio)) != 0:
|
| 174 |
-
audio = audio / torch.max(torch.abs(audio))
|
| 175 |
-
except Exception as e:
|
| 176 |
-
raise ValueError(f"Error in audio normalization: {e}")
|
| 177 |
-
|
| 178 |
-
# Clip the audio to ensure it fits in the range [-1, 1]
|
| 179 |
audio = torch.clamp(audio, min=-1, max=1)
|
| 180 |
|
| 181 |
-
# Convert to 16-bit PCM format
|
| 182 |
audio = (audio * 32767).short()
|
| 183 |
|
| 184 |
return audio
|
|
|
|
| 145 |
|
| 146 |
def spectrogram_to_audio(magnitude_spectrogram):
|
| 147 |
# Perform inverse log scaling
|
| 148 |
+
magnitude_spectrogram = torch.expm1(magnitude_spectrogram)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
# Convert magnitude-only spectrogram to complex format
|
| 151 |
+
complex_spectrogram = magnitude_to_complex_spectrogram(magnitude_spectrogram)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
|
| 153 |
# Inverse STFT to convert the spectrogram back to audio
|
| 154 |
+
audio = torch.istft(complex_spectrogram, n_fft=n_fft, hop_length=hop_length)
|
| 155 |
+
|
| 156 |
+
# Normalize and clip the audio
|
| 157 |
+
if torch.max(torch.abs(audio)) != 0:
|
| 158 |
+
audio = audio / torch.max(torch.abs(audio))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
audio = torch.clamp(audio, min=-1, max=1)
|
| 160 |
|
| 161 |
+
# Convert to 16-bit PCM format
|
| 162 |
audio = (audio * 32767).short()
|
| 163 |
|
| 164 |
return audio
|