SohomToom commited on
Commit
5e9b992
·
verified ·
1 Parent(s): 905b4c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -12
app.py CHANGED
@@ -12,6 +12,7 @@ os.environ["NUMBA_DISABLE_CACHE"] = "1"
12
  # import english_patch
13
  #from melo.api import TTS
14
  from MeloTTS.melo.api import TTS
 
15
  from openvoice.api import ToneColorConverter
16
  #from meloTTS import english
17
 
@@ -33,46 +34,51 @@ output_dir = "/tmp/outputs"
33
  os.makedirs(output_dir, exist_ok=True)
34
 
35
  # Initialize tone converter
36
- ckpt_converter = "checkpoints/converter/config.json"
37
- tone_color_converter = ToneColorConverter(ckpt_converter)
38
 
39
  # Device setting
40
  device = "cuda" if torch.cuda.is_available() else "cpu"
41
 
 
 
 
42
  def clone_and_speak(text, speaker_wav):
43
  if not speaker_wav:
44
  return "Please upload a reference .wav file."
45
 
46
  base_name = f"output_{int(time.time())}_{uuid.uuid4().hex[:6]}"
47
  tmp_melo_path = f"{output_dir}/{base_name}_tmp.wav"
48
- final_output_path = f"{output_dir}/{base_name}_converted.wav"
 
 
 
49
 
50
  # Use English speaker model
51
  model = TTS(language="EN", device=device)
52
  speaker_ids = model.hps.data.spk2id
53
  default_speaker_id = next(iter(speaker_ids.values()))
54
 
 
 
55
  for speaker_key in speaker_ids.keys():
56
  speaker_id = speaker_ids[speaker_key]
57
  speaker_key = speaker_key.lower().replace('_', '-')
58
-
59
- # Generate base TTS voice
60
- speed = 1.0
61
- #source_se = torch.load(f'checkpoints/base_speakers/EN/{speaker_key}.pth', map_location=device)
62
 
63
-
64
  # Use speaker_wav as reference to extract style embedding
65
- from openvoice import se_extractor
66
- torch.hub.load('snakers4/silero-vad', 'silero_vad', force_reload=False)
67
- ref_se, _ = se_extractor.get_se(speaker_wav, tone_color_converter, vad=True)
68
  if torch.backends.mps.is_available() and device == 'cpu':
69
  torch.backends.mps.is_available = lambda: False
70
  model.tts_to_file(text, speaker_id, tmp_melo_path,speed=speed)
 
71
 
72
  # Run the tone conversion
73
  tone_color_converter.convert(
74
  audio_src_path=tmp_melo_path,
75
- src_se=ref_se,
76
  tgt_se=ref_se,
77
  output_path=final_output_path,
78
  message="@HuggingFace",
 
12
  # import english_patch
13
  #from melo.api import TTS
14
  from MeloTTS.melo.api import TTS
15
+ from openvoice import se_extractor
16
  from openvoice.api import ToneColorConverter
17
  #from meloTTS import english
18
 
 
34
  os.makedirs(output_dir, exist_ok=True)
35
 
36
  # Initialize tone converter
37
+ ckpt_converter = "checkpoints/converter"
38
+
39
 
40
  # Device setting
41
  device = "cuda" if torch.cuda.is_available() else "cpu"
42
 
43
+ tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device=device)
44
+ tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
45
+
46
  def clone_and_speak(text, speaker_wav):
47
  if not speaker_wav:
48
  return "Please upload a reference .wav file."
49
 
50
  base_name = f"output_{int(time.time())}_{uuid.uuid4().hex[:6]}"
51
  tmp_melo_path = f"{output_dir}/{base_name}_tmp.wav"
52
+
53
+
54
+
55
+ ref_se, _ = se_extractor.get_se(speaker_wav, tone_color_converter, vad=True)
56
 
57
  # Use English speaker model
58
  model = TTS(language="EN", device=device)
59
  speaker_ids = model.hps.data.spk2id
60
  default_speaker_id = next(iter(speaker_ids.values()))
61
 
62
+
63
+
64
  for speaker_key in speaker_ids.keys():
65
  speaker_id = speaker_ids[speaker_key]
66
  speaker_key = speaker_key.lower().replace('_', '-')
67
+ source_se = torch.load(f'checkpoint/base_speakers/ses/{speaker_key}.pth', map_location=device)
 
 
 
68
 
69
+ speed = 1.0
70
  # Use speaker_wav as reference to extract style embedding
71
+ #torch.hub.load('snakers4/silero-vad', 'silero_vad', force_reload=False)
72
+
 
73
  if torch.backends.mps.is_available() and device == 'cpu':
74
  torch.backends.mps.is_available = lambda: False
75
  model.tts_to_file(text, speaker_id, tmp_melo_path,speed=speed)
76
+ final_output_path = f"{output_dir}/{base_name}_converted.wav"
77
 
78
  # Run the tone conversion
79
  tone_color_converter.convert(
80
  audio_src_path=tmp_melo_path,
81
+ src_se=source_se,
82
  tgt_se=ref_se,
83
  output_path=final_output_path,
84
  message="@HuggingFace",