Spaces:
Paused
Paused
Update inference/m4singer/base_svs_infer.py
Browse files
inference/m4singer/base_svs_infer.py
CHANGED
@@ -14,6 +14,7 @@ import librosa
|
|
14 |
import glob
|
15 |
import re
|
16 |
|
|
|
17 |
|
18 |
class BaseSVSInfer:
|
19 |
def __init__(self, hparams, device=None):
|
@@ -39,9 +40,11 @@ class BaseSVSInfer:
|
|
39 |
self.vocoder.eval()
|
40 |
self.vocoder.to(self.device)
|
41 |
|
|
|
42 |
def build_model(self):
|
43 |
raise NotImplementedError
|
44 |
|
|
|
45 |
def forward_model(self, inp):
|
46 |
raise NotImplementedError
|
47 |
|
@@ -70,7 +73,8 @@ class BaseSVSInfer:
|
|
70 |
y = self.vocoder(c).view(-1)
|
71 |
# [T]
|
72 |
return y[None]
|
73 |
-
|
|
|
74 |
def preprocess_word_level_input(self, inp):
|
75 |
# Pypinyin can't solve polyphonic words
|
76 |
text_raw = inp['text']
|
@@ -138,6 +142,7 @@ class BaseSVSInfer:
|
|
138 |
return None
|
139 |
return ph_seq, note_lst, midi_dur_lst, is_slur
|
140 |
|
|
|
141 |
def preprocess_phoneme_level_input(self, inp):
|
142 |
ph_seq = inp['ph_seq']
|
143 |
note_lst = inp['note_seq'].split()
|
@@ -152,6 +157,7 @@ class BaseSVSInfer:
|
|
152 |
return None
|
153 |
return ph_seq, note_lst, midi_dur_lst, is_slur
|
154 |
|
|
|
155 |
def preprocess_input(self, inp, input_type='word'):
|
156 |
"""
|
157 |
|
|
|
14 |
import glob
|
15 |
import re
|
16 |
|
17 |
+
import spaces
|
18 |
|
19 |
class BaseSVSInfer:
|
20 |
def __init__(self, hparams, device=None):
|
|
|
40 |
self.vocoder.eval()
|
41 |
self.vocoder.to(self.device)
|
42 |
|
43 |
+
@spaces.GPU
|
44 |
def build_model(self):
|
45 |
raise NotImplementedError
|
46 |
|
47 |
+
@spaces.GPU
|
48 |
def forward_model(self, inp):
|
49 |
raise NotImplementedError
|
50 |
|
|
|
73 |
y = self.vocoder(c).view(-1)
|
74 |
# [T]
|
75 |
return y[None]
|
76 |
+
|
77 |
+
@spaces.GPU
|
78 |
def preprocess_word_level_input(self, inp):
|
79 |
# Pypinyin can't solve polyphonic words
|
80 |
text_raw = inp['text']
|
|
|
142 |
return None
|
143 |
return ph_seq, note_lst, midi_dur_lst, is_slur
|
144 |
|
145 |
+
@spaces.GPU
|
146 |
def preprocess_phoneme_level_input(self, inp):
|
147 |
ph_seq = inp['ph_seq']
|
148 |
note_lst = inp['note_seq'].split()
|
|
|
157 |
return None
|
158 |
return ph_seq, note_lst, midi_dur_lst, is_slur
|
159 |
|
160 |
+
@spaces.GPU
|
161 |
def preprocess_input(self, inp, input_type='word'):
|
162 |
"""
|
163 |
|