Update GPT_SoVITS/TTS_infer_pack/TTS.py
Browse files
GPT_SoVITS/TTS_infer_pack/TTS.py
CHANGED
@@ -655,8 +655,8 @@ class TTS:
|
|
655 |
)
|
656 |
with torch.no_grad():
|
657 |
wav16k, sr = librosa.load(ref_wav_path, sr=16000)
|
658 |
-
if (wav16k.shape[0] >
|
659 |
-
raise OSError(i18n("参考音频在3~
|
660 |
wav16k = torch.from_numpy(wav16k)
|
661 |
zero_wav_torch = torch.from_numpy(zero_wav)
|
662 |
wav16k = wav16k.to(self.configs.device)
|
@@ -1230,7 +1230,13 @@ class TTS:
|
|
1230 |
else:
|
1231 |
audio = audio.cpu().numpy()
|
1232 |
|
1233 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
1234 |
|
1235 |
# try:
|
1236 |
# if speed_factor != 1.0:
|
@@ -1296,4 +1302,4 @@ class TTS:
|
|
1296 |
wav_gen = self.bigvgan_model(cmf_res)
|
1297 |
audio=wav_gen[0][0]#.cpu().detach().numpy()
|
1298 |
|
1299 |
-
return audio
|
|
|
655 |
)
|
656 |
with torch.no_grad():
|
657 |
wav16k, sr = librosa.load(ref_wav_path, sr=16000)
|
658 |
+
if (wav16k.shape[0] > 240000 or wav16k.shape[0] < 48000):
|
659 |
+
raise OSError(i18n("参考音频在3~15秒范围外,请更换!"))
|
660 |
wav16k = torch.from_numpy(wav16k)
|
661 |
zero_wav_torch = torch.from_numpy(zero_wav)
|
662 |
wav16k = wav16k.to(self.configs.device)
|
|
|
1230 |
else:
|
1231 |
audio = audio.cpu().numpy()
|
1232 |
|
1233 |
+
if hasattr(audio, 'cpu'):
|
1234 |
+
# If audio is a tensor, convert it to a NumPy array
|
1235 |
+
audio = (audio * 32768).cpu().numpy()
|
1236 |
+
else:
|
1237 |
+
audio = audio * 32768
|
1238 |
+
|
1239 |
+
audio = audio.astype(np.int16)
|
1240 |
|
1241 |
# try:
|
1242 |
# if speed_factor != 1.0:
|
|
|
1302 |
wav_gen = self.bigvgan_model(cmf_res)
|
1303 |
audio=wav_gen[0][0]#.cpu().detach().numpy()
|
1304 |
|
1305 |
+
return audio
|