Spaces:

mrfakename
/

SNAC

Paused

mrfakename commited on 9 days ago

Commit

55781cc

verified ·

1 Parent(s): c966889

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,16 +11,11 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 MODEL = SNAC.from_pretrained("hubertsiuzdak/snac_24khz").eval().to(DEVICE)
 def reconstruct(audio_in):
-    """
-    audio_in is (sample_rate:int, data:np.ndarray) from gr.Audio(type="numpy")
-    returns (24000, np.ndarray)
-    """
     if audio_in is None:
         return None
-    sr, data = audio_in  # data: (T,) or (T, C)
-    # convert to mono if stereo
     if data.ndim == 2 and data.shape[1] > 1:
         data = data.mean(axis=1)
@@ -32,10 +27,11 @@ def reconstruct(audio_in):
     x = x.unsqueeze(0).to(DEVICE)  # [1, 1, T]
     with torch.inference_mode():
-        y_hat, _, _, _, _ = MODEL(x)  # [1, 1, T]
-    y = y_hat.squeeze(0).squeeze(0).detach().cpu()
-    y = torch.clamp(y, -1.0, 1.0)  # safety clamp
     return (24000, y.numpy())

 MODEL = SNAC.from_pretrained("hubertsiuzdak/snac_24khz").eval().to(DEVICE)
 def reconstruct(audio_in):
     if audio_in is None:
         return None
+    sr, data = audio_in
     if data.ndim == 2 and data.shape[1] > 1:
         data = data.mean(axis=1)
     x = x.unsqueeze(0).to(DEVICE)  # [1, 1, T]
     with torch.inference_mode():
+        out = MODEL(x)
+        audio_hat = out[0] if isinstance(out, (list, tuple)) else out
+    y = audio_hat.squeeze(0).squeeze(0).detach().cpu()
+    y = torch.clamp(y, -1.0, 1.0)
     return (24000, y.numpy())