Spaces:

mrfakename
/

SNAC

Paused

mrfakename commited on 8 days ago

Commit

17fb016

verified ·

1 Parent(s): 5196160

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import torch
+import torchaudio
+from snac import SNAC
+import soundfile as sf
+filename = "/content/en_sample.wav"
+audio, sr = torchaudio.load(filename)
+# Resample to 24kHz if necessary
+if sr != 24000:
+    resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=24000)
+    audio = resampler(audio)
+# Convert to mono by averaging the channels if the audio is stereo
+if audio.size(0) > 1:
+    audio = torch.mean(audio, dim=0, keepdim=True)
+# Confirm audio is in the shape [1, 1, T] where T is the sequence length
+print("Audio size after processing:", audio.size(), audio.shape)
+# Load the SNAC model
+model = SNAC.from_pretrained("hubertsiuzdak/snac_24khz").eval()
+# Move to CUDA if available
+if torch.cuda.is_available():
+    model = model.cuda()
+    audio = audio.cuda()
+audio = torch.unsqueeze(audio, 0)
+# Encode and decode the audio with SNAC
+with torch.inference_mode():
+    audio_hat, _, codes, _, _ = model(audio)
+# Move the tensor back to CPU for saving and convert back to numpy
+audio_hat = audio_hat.cpu().detach().numpy()
+# Save the reconstructed audio file
+sf.write('reconstructed_audio.wav', audio_hat.squeeze(), 24000)  # Use .squeeze() to remove single-dimensional entries