OpenSound commited on
Commit
dab49a2
·
verified ·
1 Parent(s): d5607c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -36
app.py CHANGED
@@ -20,6 +20,43 @@ from solospeech.corrector.geco.util.other import pad_spec
20
  from huggingface_hub import snapshot_download
21
  import time
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  parser = argparse.ArgumentParser()
24
  # pre-trained model path
25
  parser.add_argument('--eta', type=int, default=0)
@@ -89,42 +126,6 @@ timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps,
89
  _ = noise_scheduler.add_noise(latents, noise, timesteps)
90
 
91
 
92
- class Encoder(Pretrained):
93
-
94
- MODULES_NEEDED = [
95
- "compute_features",
96
- "mean_var_norm",
97
- "embedding_model"
98
- ]
99
-
100
- def __init__(self, *args, **kwargs):
101
- super().__init__(*args, **kwargs)
102
-
103
- def encode_batch(self, wavs, wav_lens=None, normalize=False):
104
- # Manage single waveforms in input
105
- if len(wavs.shape) == 1:
106
- wavs = wavs.unsqueeze(0)
107
-
108
- # Assign full length if wav_lens is not assigned
109
- if wav_lens is None:
110
- wav_lens = torch.ones(wavs.shape[0], device=self.device)
111
-
112
- # Storing waveform in the specified device
113
- wavs, wav_lens = wavs.to(self.device), wav_lens.to(self.device)
114
- wavs = wavs.float()
115
-
116
- # Computing features and embeddings
117
- feats = self.mods.compute_features(wavs)
118
- feats = self.mods.mean_var_norm(feats, wav_lens)
119
- embeddings = self.mods.embedding_model(feats, wav_lens)
120
- if normalize:
121
- embeddings = self.hparams.mean_var_norm_emb(
122
- embeddings,
123
- torch.ones(embeddings.shape[0], device=self.device)
124
- )
125
- return embeddings
126
-
127
-
128
 
129
  @spaces.GPU
130
  def sample_diffusion(tse_model, tsr_model, autoencoder, std, scheduler, device,
 
20
  from huggingface_hub import snapshot_download
21
  import time
22
 
23
+
24
+ class Encoder(Pretrained):
25
+
26
+ MODULES_NEEDED = [
27
+ "compute_features",
28
+ "mean_var_norm",
29
+ "embedding_model"
30
+ ]
31
+
32
+ def __init__(self, *args, **kwargs):
33
+ super().__init__(*args, **kwargs)
34
+
35
+ def encode_batch(self, wavs, wav_lens=None, normalize=False):
36
+ # Manage single waveforms in input
37
+ if len(wavs.shape) == 1:
38
+ wavs = wavs.unsqueeze(0)
39
+
40
+ # Assign full length if wav_lens is not assigned
41
+ if wav_lens is None:
42
+ wav_lens = torch.ones(wavs.shape[0], device=self.device)
43
+
44
+ # Storing waveform in the specified device
45
+ wavs, wav_lens = wavs.to(self.device), wav_lens.to(self.device)
46
+ wavs = wavs.float()
47
+
48
+ # Computing features and embeddings
49
+ feats = self.mods.compute_features(wavs)
50
+ feats = self.mods.mean_var_norm(feats, wav_lens)
51
+ embeddings = self.mods.embedding_model(feats, wav_lens)
52
+ if normalize:
53
+ embeddings = self.hparams.mean_var_norm_emb(
54
+ embeddings,
55
+ torch.ones(embeddings.shape[0], device=self.device)
56
+ )
57
+ return embeddings
58
+
59
+
60
  parser = argparse.ArgumentParser()
61
  # pre-trained model path
62
  parser.add_argument('--eta', type=int, default=0)
 
126
  _ = noise_scheduler.add_noise(latents, noise, timesteps)
127
 
128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
  @spaces.GPU
131
  def sample_diffusion(tse_model, tsr_model, autoencoder, std, scheduler, device,