kevinwang676 commited on
Commit
b0e461f
·
verified ·
1 Parent(s): 946de3e

Update GPT_SoVITS/TTS_infer_pack/TTS.py

Browse files
Files changed (1) hide show
  1. GPT_SoVITS/TTS_infer_pack/TTS.py +10 -4
GPT_SoVITS/TTS_infer_pack/TTS.py CHANGED
@@ -655,8 +655,8 @@ class TTS:
655
  )
656
  with torch.no_grad():
657
  wav16k, sr = librosa.load(ref_wav_path, sr=16000)
658
- if (wav16k.shape[0] > 160000 or wav16k.shape[0] < 48000):
659
- raise OSError(i18n("参考音频在3~10秒范围外,请更换!"))
660
  wav16k = torch.from_numpy(wav16k)
661
  zero_wav_torch = torch.from_numpy(zero_wav)
662
  wav16k = wav16k.to(self.configs.device)
@@ -1230,7 +1230,13 @@ class TTS:
1230
  else:
1231
  audio = audio.cpu().numpy()
1232
 
1233
- audio = (audio * 32768).astype(np.int16)
 
 
 
 
 
 
1234
 
1235
  # try:
1236
  # if speed_factor != 1.0:
@@ -1296,4 +1302,4 @@ class TTS:
1296
  wav_gen = self.bigvgan_model(cmf_res)
1297
  audio=wav_gen[0][0]#.cpu().detach().numpy()
1298
 
1299
- return audio
 
655
  )
656
  with torch.no_grad():
657
  wav16k, sr = librosa.load(ref_wav_path, sr=16000)
658
+ if (wav16k.shape[0] > 240000 or wav16k.shape[0] < 48000):
659
+ raise OSError(i18n("参考音频在3~15秒范围外,请更换!"))
660
  wav16k = torch.from_numpy(wav16k)
661
  zero_wav_torch = torch.from_numpy(zero_wav)
662
  wav16k = wav16k.to(self.configs.device)
 
1230
  else:
1231
  audio = audio.cpu().numpy()
1232
 
1233
+ if hasattr(audio, 'cpu'):
1234
+ # If audio is a tensor, convert it to a NumPy array
1235
+ audio = (audio * 32768).cpu().numpy()
1236
+ else:
1237
+ audio = audio * 32768
1238
+
1239
+ audio = audio.astype(np.int16)
1240
 
1241
  # try:
1242
  # if speed_factor != 1.0:
 
1302
  wav_gen = self.bigvgan_model(cmf_res)
1303
  audio=wav_gen[0][0]#.cpu().detach().numpy()
1304
 
1305
+ return audio