ollieollie commited on
Commit
580ad9f
·
verified ·
1 Parent(s): 3829bb0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -11
app.py CHANGED
@@ -1,31 +1,50 @@
1
  import random
2
  import numpy as np
3
  import torch
4
- from chatterbox.src.orator.tts import OratorTTS
5
  import gradio as gr
6
 
7
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
8
 
9
 
 
 
 
 
 
 
10
 
11
- model = OratorTTS.from_pretrained(DEVICE)
12
 
13
- def generate(text, audio_prompt_path, exaggeration, pace, temperature, seed_num):
14
- with torch.inference_mode():
15
- wav = model.generate(
16
- text,
17
- audio_prompt_path=audio_prompt_path,
18
- emotion_adv=exaggeration,
19
- )
 
 
 
 
 
 
 
20
  return model.sr, wav.squeeze(0).numpy()
21
 
22
 
23
  with gr.Blocks() as demo:
24
  with gr.Row():
25
  with gr.Column():
26
- text = gr.Textbox(value="I know what you're thinking. \"Did he fire six shots, or only five?\" Well, to tell you the truth, in all this excitement, I kind of lost track myself.", label="Text to synthesize")
27
- ref_wav = gr.Audio(sources="upload", type="filepath", label="Reference Audio File")
28
  exaggeration = gr.Slider(0.25, 2, step=.05, label="Exaggeration (Neutral = 0.5, extreme values can be unstable)", value=.5)
 
 
 
 
 
 
 
29
 
30
  run_btn = gr.Button("Generate", variant="primary")
31
 
@@ -38,6 +57,10 @@ with gr.Blocks() as demo:
38
  text,
39
  ref_wav,
40
  exaggeration,
 
 
 
 
41
  ],
42
  outputs=audio_output,
43
  )
 
1
  import random
2
  import numpy as np
3
  import torch
4
+ from chatterbox.src.chatterbox.tts import ChatterboxTTS
5
  import gradio as gr
6
 
7
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
8
 
9
 
10
+ def set_seed(seed: int):
11
+ torch.manual_seed(seed)
12
+ torch.cuda.manual_seed(seed)
13
+ torch.cuda.manual_seed_all(seed)
14
+ random.seed(seed)
15
+ np.random.seed(seed)
16
 
 
17
 
18
+ model = ChatterboxTTS.from_pretrained(DEVICE)
19
+
20
+ def generate(text, audio_prompt_path, exaggeration, pace, temperature, seed_num, cfg_weight):
21
+ if seed_num != 0:
22
+ set_seed(int(seed_num))
23
+
24
+ wav = model.generate(
25
+ text,
26
+ audio_prompt_path=audio_prompt_path,
27
+ exaggeration=exaggeration,
28
+ pace=pace,
29
+ temperature=temperature,
30
+ cfg_weight=cfg_weight,
31
+ )
32
  return model.sr, wav.squeeze(0).numpy()
33
 
34
 
35
  with gr.Blocks() as demo:
36
  with gr.Row():
37
  with gr.Column():
38
+ text = gr.Textbox(value="What does the fox say?", label="Text to synthesize")
39
+ ref_wav = gr.Audio(sources="upload", type="filepath", label="Reference Audio File", value=None)
40
  exaggeration = gr.Slider(0.25, 2, step=.05, label="Exaggeration (Neutral = 0.5, extreme values can be unstable)", value=.5)
41
+ cfg_weight = gr.Slider(0.0, 5, step=.05, label="CFG/Pace", value=1.0)
42
+
43
+
44
+ with gr.Accordion("More options", open=False):
45
+ seed_num = gr.Number(value=0, label="Random seed (0 for random)")
46
+ temp = gr.Slider(0.05, 5, step=.05, label="temperature", value=.8)
47
+ pace = gr.Slider(0.8, 1.2, step=.01, label="pace", value=1)
48
 
49
  run_btn = gr.Button("Generate", variant="primary")
50
 
 
57
  text,
58
  ref_wav,
59
  exaggeration,
60
+ pace,
61
+ temp,
62
+ seed_num,
63
+ cfg_weight,
64
  ],
65
  outputs=audio_output,
66
  )