Spaces:

OpenSound
/

SoloSpeech

Running on Zero

App Files Files Community

OpenSound commited on 7 days ago

Commit

dab49a2

verified ·

1 Parent(s): d5607c8

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -36

app.py CHANGED Viewed

@@ -20,6 +20,43 @@ from solospeech.corrector.geco.util.other import pad_spec
 from huggingface_hub import snapshot_download
 import time
 parser = argparse.ArgumentParser()
 # pre-trained model path
 parser.add_argument('--eta', type=int, default=0)
@@ -89,42 +126,6 @@ timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps,
 _ = noise_scheduler.add_noise(latents, noise, timesteps)
-class Encoder(Pretrained):
-    MODULES_NEEDED = [
-        "compute_features",
-        "mean_var_norm",
-        "embedding_model"
-    ]
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-    def encode_batch(self, wavs, wav_lens=None, normalize=False):
-        # Manage single waveforms in input
-        if len(wavs.shape) == 1:
-            wavs = wavs.unsqueeze(0)
-        # Assign full length if wav_lens is not assigned
-        if wav_lens is None:
-            wav_lens = torch.ones(wavs.shape[0], device=self.device)
-        # Storing waveform in the specified device
-        wavs, wav_lens = wavs.to(self.device), wav_lens.to(self.device)
-        wavs = wavs.float()
-        # Computing features and embeddings
-        feats = self.mods.compute_features(wavs)
-        feats = self.mods.mean_var_norm(feats, wav_lens)
-        embeddings = self.mods.embedding_model(feats, wav_lens)
-        if normalize:
-            embeddings = self.hparams.mean_var_norm_emb(
-                embeddings,
-                torch.ones(embeddings.shape[0], device=self.device)
-            )
-        return embeddings
 @spaces.GPU
 def sample_diffusion(tse_model, tsr_model, autoencoder, std, scheduler, device,

 from huggingface_hub import snapshot_download
 import time
+class Encoder(Pretrained):
+    MODULES_NEEDED = [
+        "compute_features",
+        "mean_var_norm",
+        "embedding_model"
+    ]
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+    def encode_batch(self, wavs, wav_lens=None, normalize=False):
+        # Manage single waveforms in input
+        if len(wavs.shape) == 1:
+            wavs = wavs.unsqueeze(0)
+        # Assign full length if wav_lens is not assigned
+        if wav_lens is None:
+            wav_lens = torch.ones(wavs.shape[0], device=self.device)
+        # Storing waveform in the specified device
+        wavs, wav_lens = wavs.to(self.device), wav_lens.to(self.device)
+        wavs = wavs.float()
+        # Computing features and embeddings
+        feats = self.mods.compute_features(wavs)
+        feats = self.mods.mean_var_norm(feats, wav_lens)
+        embeddings = self.mods.embedding_model(feats, wav_lens)
+        if normalize:
+            embeddings = self.hparams.mean_var_norm_emb(
+                embeddings,
+                torch.ones(embeddings.shape[0], device=self.device)
+            )
+        return embeddings
 parser = argparse.ArgumentParser()
 # pre-trained model path
 parser.add_argument('--eta', type=int, default=0)
 _ = noise_scheduler.add_noise(latents, noise, timesteps)
 @spaces.GPU
 def sample_diffusion(tse_model, tsr_model, autoencoder, std, scheduler, device,