ginipick commited on
Commit
075fd65
·
verified ·
1 Parent(s): f2cbba3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +557 -0
app.py ADDED
@@ -0,0 +1,557 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shlex
3
+ import subprocess
4
+
5
+ subprocess.run(
6
+ shlex.split("pip install flash-attn --no-build-isolation"),
7
+ env=os.environ | {"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
8
+ check=True,
9
+ )
10
+ subprocess.run(
11
+ shlex.split("pip install https://github.com/state-spaces/mamba/releases/download/v2.2.4/mamba_ssm-2.2.4+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl"),
12
+ check=True,
13
+ )
14
+ subprocess.run(
15
+ shlex.split("pip install https://github.com/Dao-AILab/causal-conv1d/releases/download/v1.5.0.post8/causal_conv1d-1.5.0.post8+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl"),
16
+ check=True,
17
+ )
18
+
19
+ import spaces
20
+ import torch
21
+ import torchaudio
22
+ import gradio as gr
23
+ from os import getenv
24
+
25
+ from zonos.model import Zonos
26
+ from zonos.conditioning import make_cond_dict, supported_language_codes
27
+
28
+ device = "cuda"
29
+ MODEL_NAMES = ["Zyphra/Zonos-v0.1-transformer", "Zyphra/Zonos-v0.1-hybrid"]
30
+ MODELS = {name: Zonos.from_pretrained(name, device=device) for name in MODEL_NAMES}
31
+ for model in MODELS.values():
32
+ model.requires_grad_(False).eval()
33
+
34
+
35
+ def update_ui(model_choice):
36
+ """
37
+ Dynamically show/hide UI elements based on the model's conditioners.
38
+ We do NOT display 'language_id' or 'ctc_loss' even if they exist in the model.
39
+ """
40
+ model = MODELS[model_choice]
41
+ cond_names = [c.name for c in model.prefix_conditioner.conditioners]
42
+ print("Conditioners in this model:", cond_names)
43
+
44
+ text_update = gr.update(visible=("espeak" in cond_names))
45
+ language_update = gr.update(visible=("espeak" in cond_names))
46
+ speaker_audio_update = gr.update(visible=("speaker" in cond_names))
47
+ prefix_audio_update = gr.update(visible=True)
48
+ emotion1_update = gr.update(visible=("emotion" in cond_names))
49
+ emotion2_update = gr.update(visible=("emotion" in cond_names))
50
+ emotion3_update = gr.update(visible=("emotion" in cond_names))
51
+ emotion4_update = gr.update(visible=("emotion" in cond_names))
52
+ emotion5_update = gr.update(visible=("emotion" in cond_names))
53
+ emotion6_update = gr.update(visible=("emotion" in cond_names))
54
+ emotion7_update = gr.update(visible=("emotion" in cond_names))
55
+ emotion8_update = gr.update(visible=("emotion" in cond_names))
56
+ vq_single_slider_update = gr.update(visible=("vqscore_8" in cond_names))
57
+ fmax_slider_update = gr.update(visible=("fmax" in cond_names))
58
+ pitch_std_slider_update = gr.update(visible=("pitch_std" in cond_names))
59
+ speaking_rate_slider_update = gr.update(visible=("speaking_rate" in cond_names))
60
+ dnsmos_slider_update = gr.update(visible=("dnsmos_ovrl" in cond_names))
61
+ speaker_noised_checkbox_update = gr.update(visible=("speaker_noised" in cond_names))
62
+ unconditional_keys_update = gr.update(
63
+ choices=[name for name in cond_names if name not in ("espeak", "language_id")]
64
+ )
65
+
66
+ return (
67
+ text_update,
68
+ language_update,
69
+ speaker_audio_update,
70
+ prefix_audio_update,
71
+ emotion1_update,
72
+ emotion2_update,
73
+ emotion3_update,
74
+ emotion4_update,
75
+ emotion5_update,
76
+ emotion6_update,
77
+ emotion7_update,
78
+ emotion8_update,
79
+ vq_single_slider_update,
80
+ fmax_slider_update,
81
+ pitch_std_slider_update,
82
+ speaking_rate_slider_update,
83
+ dnsmos_slider_update,
84
+ speaker_noised_checkbox_update,
85
+ unconditional_keys_update,
86
+ )
87
+
88
+
89
+ @spaces.GPU(duration=120)
90
+ def generate_audio(
91
+ model_choice,
92
+ text,
93
+ language,
94
+ speaker_audio,
95
+ prefix_audio,
96
+ e1,
97
+ e2,
98
+ e3,
99
+ e4,
100
+ e5,
101
+ e6,
102
+ e7,
103
+ e8,
104
+ vq_single,
105
+ fmax,
106
+ pitch_std,
107
+ speaking_rate,
108
+ dnsmos_ovrl,
109
+ speaker_noised,
110
+ cfg_scale,
111
+ min_p,
112
+ seed,
113
+ randomize_seed,
114
+ unconditional_keys,
115
+ progress=gr.Progress(),
116
+ ):
117
+ """
118
+ Generates audio based on the provided UI parameters.
119
+ We do NOT use language_id or ctc_loss even if the model has them.
120
+ """
121
+ selected_model = MODELS[model_choice]
122
+
123
+ speaker_noised_bool = bool(speaker_noised)
124
+ fmax = float(fmax)
125
+ pitch_std = float(pitch_std)
126
+ speaking_rate = float(speaking_rate)
127
+ dnsmos_ovrl = float(dnsmos_ovrl)
128
+ cfg_scale = float(cfg_scale)
129
+ min_p = float(min_p)
130
+ seed = int(seed)
131
+ max_new_tokens = 86 * 30
132
+
133
+ if randomize_seed:
134
+ seed = torch.randint(0, 2**32 - 1, (1,)).item()
135
+ torch.manual_seed(seed)
136
+
137
+ speaker_embedding = None
138
+ if speaker_audio is not None and "speaker" not in unconditional_keys:
139
+ wav, sr = torchaudio.load(speaker_audio)
140
+ speaker_embedding = selected_model.make_speaker_embedding(wav, sr)
141
+ speaker_embedding = speaker_embedding.to(device, dtype=torch.bfloat16)
142
+
143
+ audio_prefix_codes = None
144
+ if prefix_audio is not None:
145
+ wav_prefix, sr_prefix = torchaudio.load(prefix_audio)
146
+ wav_prefix = wav_prefix.mean(0, keepdim=True)
147
+ wav_prefix = torchaudio.functional.resample(wav_prefix, sr_prefix, selected_model.autoencoder.sampling_rate)
148
+ wav_prefix = wav_prefix.to(device, dtype=torch.float32)
149
+ with torch.autocast(device, dtype=torch.float32):
150
+ audio_prefix_codes = selected_model.autoencoder.encode(wav_prefix.unsqueeze(0))
151
+
152
+ emotion_tensor = torch.tensor(list(map(float, [e1, e2, e3, e4, e5, e6, e7, e8])), device=device)
153
+
154
+ vq_val = float(vq_single)
155
+ vq_tensor = torch.tensor([vq_val] * 8, device=device).unsqueeze(0)
156
+
157
+ cond_dict = make_cond_dict(
158
+ text=text,
159
+ language=language,
160
+ speaker=speaker_embedding,
161
+ emotion=emotion_tensor,
162
+ vqscore_8=vq_tensor,
163
+ fmax=fmax,
164
+ pitch_std=pitch_std,
165
+ speaking_rate=speaking_rate,
166
+ dnsmos_ovrl=dnsmos_ovrl,
167
+ speaker_noised=speaker_noised_bool,
168
+ device=device,
169
+ unconditional_keys=unconditional_keys,
170
+ )
171
+ conditioning = selected_model.prepare_conditioning(cond_dict)
172
+
173
+ estimated_generation_duration = 30 * len(text) / 400
174
+ estimated_total_steps = int(estimated_generation_duration * 86)
175
+
176
+ def update_progress(_frame: torch.Tensor, step: int, _total_steps: int) -> bool:
177
+ progress((step, estimated_total_steps))
178
+ return True
179
+
180
+ codes = selected_model.generate(
181
+ prefix_conditioning=conditioning,
182
+ audio_prefix_codes=audio_prefix_codes,
183
+ max_new_tokens=max_new_tokens,
184
+ cfg_scale=cfg_scale,
185
+ batch_size=1,
186
+ sampling_params=dict(min_p=min_p),
187
+ callback=update_progress,
188
+ )
189
+
190
+ wav_out = selected_model.autoencoder.decode(codes).cpu().detach()
191
+ sr_out = selected_model.autoencoder.sampling_rate
192
+ if wav_out.dim() == 2 and wav_out.size(0) > 1:
193
+ wav_out = wav_out[0:1, :]
194
+ return (sr_out, wav_out.squeeze().numpy()), seed
195
+
196
+
197
+ # Custom CSS for pastel gradient background and enhanced UI
198
+ custom_css = """
199
+ .gradio-container {
200
+ background: linear-gradient(135deg, #f3e7ff, #e6f0ff, #ffe6f2, #e6fff9);
201
+ background-size: 400% 400%;
202
+ animation: gradient 15s ease infinite;
203
+ }
204
+
205
+ @keyframes gradient {
206
+ 0% {
207
+ background-position: 0% 50%;
208
+ }
209
+ 50% {
210
+ background-position: 100% 50%;
211
+ }
212
+ 100% {
213
+ background-position: 0% 50%;
214
+ }
215
+ }
216
+
217
+ .container {
218
+ max-width: 1200px;
219
+ margin: 0 auto;
220
+ padding: 20px;
221
+ }
222
+
223
+ .panel {
224
+ background-color: rgba(255, 255, 255, 0.7);
225
+ border-radius: 16px;
226
+ padding: 20px;
227
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.08);
228
+ margin-bottom: 16px;
229
+ backdrop-filter: blur(5px);
230
+ transition: all 0.3s ease;
231
+ }
232
+
233
+ .panel:hover {
234
+ box-shadow: 0 6px 16px rgba(0, 0, 0, 0.12);
235
+ transform: translateY(-2px);
236
+ }
237
+
238
+ .title {
239
+ font-size: 1.2em;
240
+ font-weight: 600;
241
+ margin-bottom: 12px;
242
+ color: #6a3ea1;
243
+ border-bottom: 2px solid #f0e6ff;
244
+ padding-bottom: 8px;
245
+ }
246
+
247
+ .slider-container {
248
+ background-color: rgba(255, 255, 255, 0.5);
249
+ border-radius: 10px;
250
+ padding: 10px;
251
+ margin: 5px 0;
252
+ }
253
+
254
+ /* Make sliders more appealing */
255
+ input[type=range] {
256
+ height: 5px;
257
+ appearance: none;
258
+ width: 100%;
259
+ border-radius: 3px;
260
+ background: linear-gradient(90deg, #9c83e0, #83b1e0);
261
+ }
262
+
263
+ .generate-button {
264
+ background: linear-gradient(90deg, #a673ff, #7c4dff);
265
+ color: white;
266
+ border: none;
267
+ border-radius: 8px;
268
+ padding: 12px 24px;
269
+ font-size: 16px;
270
+ font-weight: 500;
271
+ cursor: pointer;
272
+ transition: all 0.3s ease;
273
+ box-shadow: 0 4px 10px rgba(124, 77, 255, 0.2);
274
+ display: block;
275
+ width: 100%;
276
+ margin: 20px 0;
277
+ }
278
+
279
+ .generate-button:hover {
280
+ background: linear-gradient(90deg, #9c5eff, #6a3aff);
281
+ box-shadow: 0 6px 15px rgba(124, 77, 255, 0.3);
282
+ transform: translateY(-2px);
283
+ }
284
+
285
+ /* Tabs styling */
286
+ .tabs {
287
+ display: flex;
288
+ border-bottom: 1px solid #e0e0e0;
289
+ margin-bottom: 20px;
290
+ }
291
+
292
+ .tab {
293
+ padding: 10px 20px;
294
+ cursor: pointer;
295
+ transition: all 0.3s ease;
296
+ background-color: transparent;
297
+ border: none;
298
+ color: #666;
299
+ }
300
+
301
+ .tab.active {
302
+ color: #7c4dff;
303
+ border-bottom: 3px solid #7c4dff;
304
+ font-weight: 600;
305
+ }
306
+
307
+ /* Emotion sliders container */
308
+ .emotion-grid {
309
+ display: grid;
310
+ grid-template-columns: repeat(4, 1fr);
311
+ gap: 12px;
312
+ }
313
+
314
+ /* Header styling */
315
+ .app-header {
316
+ text-align: center;
317
+ margin-bottom: 25px;
318
+ }
319
+
320
+ .app-header h1 {
321
+ font-size: 2.5em;
322
+ color: #6a3ea1;
323
+ margin-bottom: 8px;
324
+ font-weight: 700;
325
+ }
326
+
327
+ .app-header p {
328
+ font-size: 1.1em;
329
+ color: #666;
330
+ margin-bottom: 20px;
331
+ }
332
+
333
+ /* Audio player styling */
334
+ .audio-output {
335
+ margin-top: 20px;
336
+ }
337
+
338
+ /* Make output area more prominent */
339
+ .output-container {
340
+ background-color: rgba(255, 255, 255, 0.85);
341
+ border-radius: 16px;
342
+ padding: 24px;
343
+ box-shadow: 0 8px 18px rgba(0, 0, 0, 0.1);
344
+ margin-top: 20px;
345
+ }
346
+ """
347
+
348
+
349
+ def build_interface():
350
+ # Build interface with enhanced visual elements and layout
351
+ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
352
+ # Header section
353
+ with gr.Column(elem_classes="app-header"):
354
+ gr.Markdown("# ✨ Zonos Text-to-Speech Generator ✨")
355
+ gr.Markdown("Create natural-sounding speech with customizable voice characteristics")
356
+
357
+ # Main content container
358
+ with gr.Column(elem_classes="container"):
359
+ # First panel - Text & Model Selection
360
+ with gr.Column(elem_classes="panel"):
361
+ gr.Markdown('<div class="title">💬 Text & Model Configuration</div>')
362
+ with gr.Row():
363
+ with gr.Column(scale=2):
364
+ model_choice = gr.Dropdown(
365
+ choices=MODEL_NAMES,
366
+ value="Zyphra/Zonos-v0.1-transformer",
367
+ label="Zonos Model Type",
368
+ info="Select the model variant to use.",
369
+ )
370
+ text = gr.Textbox(
371
+ label="Text to Synthesize",
372
+ value="Zonos uses eSpeak for text to phoneme conversion!",
373
+ lines=4,
374
+ max_length=500,
375
+ )
376
+ language = gr.Dropdown(
377
+ choices=supported_language_codes,
378
+ value="en-us",
379
+ label="Language Code",
380
+ info="Select a language code.",
381
+ )
382
+ with gr.Column(scale=1):
383
+ prefix_audio = gr.Audio(
384
+ value="assets/silence_100ms.wav",
385
+ label="Optional Prefix Audio (continue from this audio)",
386
+ type="filepath",
387
+ )
388
+
389
+ # Second panel - Voice Characteristics
390
+ with gr.Column(elem_classes="panel"):
391
+ gr.Markdown('<div class="title">🎤 Voice Characteristics</div>')
392
+ with gr.Row():
393
+ with gr.Column(scale=1):
394
+ speaker_audio = gr.Audio(
395
+ label="Optional Speaker Audio (for voice cloning)",
396
+ type="filepath",
397
+ )
398
+ speaker_noised_checkbox = gr.Checkbox(label="Denoise Speaker?", value=False)
399
+
400
+ with gr.Column(scale=2):
401
+ with gr.Row():
402
+ with gr.Column():
403
+ dnsmos_slider = gr.Slider(1.0, 5.0, value=4.0, step=0.1, label="Voice Quality", elem_classes="slider-container")
404
+ fmax_slider = gr.Slider(0, 24000, value=24000, step=1, label="Frequency Max (Hz)", elem_classes="slider-container")
405
+ vq_single_slider = gr.Slider(0.5, 0.8, 0.78, 0.01, label="Voice Clarity", elem_classes="slider-container")
406
+ with gr.Column():
407
+ pitch_std_slider = gr.Slider(0.0, 300.0, value=45.0, step=1, label="Pitch Variation", elem_classes="slider-container")
408
+ speaking_rate_slider = gr.Slider(5.0, 30.0, value=15.0, step=0.5, label="Speaking Rate", elem_classes="slider-container")
409
+
410
+ # Third panel - Generation Parameters
411
+ with gr.Column(elem_classes="panel"):
412
+ gr.Markdown('<div class="title">⚙️ Generation Parameters</div>')
413
+ with gr.Row():
414
+ with gr.Column():
415
+ cfg_scale_slider = gr.Slider(1.0, 5.0, 2.0, 0.1, label="Guidance Scale", elem_classes="slider-container")
416
+ min_p_slider = gr.Slider(0.0, 1.0, 0.15, 0.01, label="Min P (Randomness)", elem_classes="slider-container")
417
+ with gr.Column():
418
+ seed_number = gr.Number(label="Seed", value=420, precision=0)
419
+ randomize_seed_toggle = gr.Checkbox(label="Randomize Seed (before generation)", value=True)
420
+
421
+ # Emotion Panel with Tabbed Interface
422
+ with gr.Accordion("🎭 Emotion Settings", open=False, elem_classes="panel"):
423
+ gr.Markdown(
424
+ "Adjust these sliders to control the emotional tone of the generated speech.\n"
425
+ "For a neutral voice, keep 'Neutral' high and other emotions low."
426
+ )
427
+ with gr.Row(elem_classes="emotion-grid"):
428
+ emotion1 = gr.Slider(0.0, 1.0, 1.0, 0.05, label="Happiness", elem_classes="slider-container")
429
+ emotion2 = gr.Slider(0.0, 1.0, 0.05, 0.05, label="Sadness", elem_classes="slider-container")
430
+ emotion3 = gr.Slider(0.0, 1.0, 0.05, 0.05, label="Disgust", elem_classes="slider-container")
431
+ emotion4 = gr.Slider(0.0, 1.0, 0.05, 0.05, label="Fear", elem_classes="slider-container")
432
+ with gr.Row(elem_classes="emotion-grid"):
433
+ emotion5 = gr.Slider(0.0, 1.0, 0.05, 0.05, label="Surprise", elem_classes="slider-container")
434
+ emotion6 = gr.Slider(0.0, 1.0, 0.05, 0.05, label="Anger", elem_classes="slider-container")
435
+ emotion7 = gr.Slider(0.0, 1.0, 0.1, 0.05, label="Other", elem_classes="slider-container")
436
+ emotion8 = gr.Slider(0.0, 1.0, 0.2, 0.05, label="Neutral", elem_classes="slider-container")
437
+
438
+ # Advanced Settings Panel
439
+ with gr.Accordion("⚡ Advanced Settings", open=False, elem_classes="panel"):
440
+ gr.Markdown(
441
+ "### Unconditional Toggles\n"
442
+ "Checking a box will make the model ignore the corresponding conditioning value and make it unconditional.\n"
443
+ 'Practically this means the given conditioning feature will be unconstrained and "filled in automatically".'
444
+ )
445
+ unconditional_keys = gr.CheckboxGroup(
446
+ [
447
+ "speaker",
448
+ "emotion",
449
+ "vqscore_8",
450
+ "fmax",
451
+ "pitch_std",
452
+ "speaking_rate",
453
+ "dnsmos_ovrl",
454
+ "speaker_noised",
455
+ ],
456
+ value=["emotion"],
457
+ label="Unconditional Keys",
458
+ )
459
+
460
+ # Generate Button and Output Area
461
+ with gr.Column(elem_classes="panel output-container"):
462
+ gr.Markdown('<div class="title">🔊 Generate & Output</div>')
463
+ generate_button = gr.Button("Generate Audio", elem_classes="generate-button")
464
+ output_audio = gr.Audio(label="Generated Audio", type="numpy", autoplay=True, elem_classes="audio-output")
465
+
466
+ model_choice.change(
467
+ fn=update_ui,
468
+ inputs=[model_choice],
469
+ outputs=[
470
+ text,
471
+ language,
472
+ speaker_audio,
473
+ prefix_audio,
474
+ emotion1,
475
+ emotion2,
476
+ emotion3,
477
+ emotion4,
478
+ emotion5,
479
+ emotion6,
480
+ emotion7,
481
+ emotion8,
482
+ vq_single_slider,
483
+ fmax_slider,
484
+ pitch_std_slider,
485
+ speaking_rate_slider,
486
+ dnsmos_slider,
487
+ speaker_noised_checkbox,
488
+ unconditional_keys,
489
+ ],
490
+ )
491
+
492
+ # On page load, trigger the same UI refresh
493
+ demo.load(
494
+ fn=update_ui,
495
+ inputs=[model_choice],
496
+ outputs=[
497
+ text,
498
+ language,
499
+ speaker_audio,
500
+ prefix_audio,
501
+ emotion1,
502
+ emotion2,
503
+ emotion3,
504
+ emotion4,
505
+ emotion5,
506
+ emotion6,
507
+ emotion7,
508
+ emotion8,
509
+ vq_single_slider,
510
+ fmax_slider,
511
+ pitch_std_slider,
512
+ speaking_rate_slider,
513
+ dnsmos_slider,
514
+ speaker_noised_checkbox,
515
+ unconditional_keys,
516
+ ],
517
+ )
518
+
519
+ # Generate audio on button click
520
+ generate_button.click(
521
+ fn=generate_audio,
522
+ inputs=[
523
+ model_choice,
524
+ text,
525
+ language,
526
+ speaker_audio,
527
+ prefix_audio,
528
+ emotion1,
529
+ emotion2,
530
+ emotion3,
531
+ emotion4,
532
+ emotion5,
533
+ emotion6,
534
+ emotion7,
535
+ emotion8,
536
+ vq_single_slider,
537
+ fmax_slider,
538
+ pitch_std_slider,
539
+ speaking_rate_slider,
540
+ dnsmos_slider,
541
+ speaker_noised_checkbox,
542
+ cfg_scale_slider,
543
+ min_p_slider,
544
+ seed_number,
545
+ randomize_seed_toggle,
546
+ unconditional_keys,
547
+ ],
548
+ outputs=[output_audio, seed_number],
549
+ )
550
+
551
+ return demo
552
+
553
+
554
+ if __name__ == "__main__":
555
+ demo = build_interface()
556
+ share = getenv("GRADIO_SHARE", "False").lower() in ("true", "1", "t")
557
+ demo.launch(server_name="0.0.0.0", server_port=7860, share=share)