ollieollie commited on
Commit
248e723
·
verified ·
1 Parent(s): 580ad9f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -34
app.py CHANGED
@@ -1,50 +1,31 @@
1
  import random
2
  import numpy as np
3
  import torch
4
- from chatterbox.src.chatterbox.tts import ChatterboxTTS
5
  import gradio as gr
6
 
7
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
8
 
9
 
10
- def set_seed(seed: int):
11
- torch.manual_seed(seed)
12
- torch.cuda.manual_seed(seed)
13
- torch.cuda.manual_seed_all(seed)
14
- random.seed(seed)
15
- np.random.seed(seed)
16
 
 
17
 
18
- model = ChatterboxTTS.from_pretrained(DEVICE)
19
-
20
- def generate(text, audio_prompt_path, exaggeration, pace, temperature, seed_num, cfg_weight):
21
- if seed_num != 0:
22
- set_seed(int(seed_num))
23
-
24
- wav = model.generate(
25
- text,
26
- audio_prompt_path=audio_prompt_path,
27
- exaggeration=exaggeration,
28
- pace=pace,
29
- temperature=temperature,
30
- cfg_weight=cfg_weight,
31
- )
32
  return model.sr, wav.squeeze(0).numpy()
33
 
34
 
35
  with gr.Blocks() as demo:
36
  with gr.Row():
37
  with gr.Column():
38
- text = gr.Textbox(value="What does the fox say?", label="Text to synthesize")
39
- ref_wav = gr.Audio(sources="upload", type="filepath", label="Reference Audio File", value=None)
40
  exaggeration = gr.Slider(0.25, 2, step=.05, label="Exaggeration (Neutral = 0.5, extreme values can be unstable)", value=.5)
41
- cfg_weight = gr.Slider(0.0, 5, step=.05, label="CFG/Pace", value=1.0)
42
-
43
-
44
- with gr.Accordion("More options", open=False):
45
- seed_num = gr.Number(value=0, label="Random seed (0 for random)")
46
- temp = gr.Slider(0.05, 5, step=.05, label="temperature", value=.8)
47
- pace = gr.Slider(0.8, 1.2, step=.01, label="pace", value=1)
48
 
49
  run_btn = gr.Button("Generate", variant="primary")
50
 
@@ -57,10 +38,6 @@ with gr.Blocks() as demo:
57
  text,
58
  ref_wav,
59
  exaggeration,
60
- pace,
61
- temp,
62
- seed_num,
63
- cfg_weight,
64
  ],
65
  outputs=audio_output,
66
  )
 
1
  import random
2
  import numpy as np
3
  import torch
4
+ from chatterbox.src.orator.tts import OratorTTS
5
  import gradio as gr
6
 
7
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
8
 
9
 
 
 
 
 
 
 
10
 
11
+ model = OratorTTS.from_pretrained(DEVICE)
12
 
13
+ def generate(text, audio_prompt_path, exaggeration, pace, temperature, seed_num):
14
+ with torch.inference_mode():
15
+ wav = model.generate(
16
+ text,
17
+ audio_prompt_path=audio_prompt_path,
18
+ emotion_adv=exaggeration,
19
+ )
 
 
 
 
 
 
 
20
  return model.sr, wav.squeeze(0).numpy()
21
 
22
 
23
  with gr.Blocks() as demo:
24
  with gr.Row():
25
  with gr.Column():
26
+ text = gr.Textbox(value="I know what you're thinking. \"Did he fire six shots, or only five?\" Well, to tell you the truth, in all this excitement, I kind of lost track myself.", label="Text to synthesize")
27
+ ref_wav = gr.Audio(sources="upload", type="filepath", label="Reference Audio File")
28
  exaggeration = gr.Slider(0.25, 2, step=.05, label="Exaggeration (Neutral = 0.5, extreme values can be unstable)", value=.5)
 
 
 
 
 
 
 
29
 
30
  run_btn = gr.Button("Generate", variant="primary")
31
 
 
38
  text,
39
  ref_wav,
40
  exaggeration,
 
 
 
 
41
  ],
42
  outputs=audio_output,
43
  )