ginipick commited on
Commit
9187b50
·
verified ·
1 Parent(s): 67bb066

Create components-backup.py

Browse files
Files changed (1) hide show
  1. ui/components-backup.py +958 -0
ui/components-backup.py ADDED
@@ -0,0 +1,958 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ACE-Step: A Step Towards Music Generation Foundation Model
3
+
4
+ https://github.com/ace-step/ACE-Step
5
+
6
+ Apache 2.0 License
7
+ """
8
+
9
+ import gradio as gr
10
+ import librosa
11
+ import os
12
+
13
+
14
+ TAG_DEFAULT = "funk, pop, soul, rock, melodic, guitar, drums, bass, keyboard, percussion, 105 BPM, energetic, upbeat, groovy, vibrant, dynamic"
15
+ LYRIC_DEFAULT = """[verse]
16
+ Neon lights they flicker bright
17
+ City hums in dead of night
18
+ Rhythms pulse through concrete veins
19
+ Lost in echoes of refrains
20
+
21
+ [verse]
22
+ Bassline groovin' in my chest
23
+ Heartbeats match the city's zest
24
+ Electric whispers fill the air
25
+ Synthesized dreams everywhere
26
+
27
+ [chorus]
28
+ Turn it up and let it flow
29
+ Feel the fire let it grow
30
+ In this rhythm we belong
31
+ Hear the night sing out our song
32
+
33
+ [verse]
34
+ Guitar strings they start to weep
35
+ Wake the soul from silent sleep
36
+ Every note a story told
37
+ In this night we’re bold and gold
38
+
39
+ [bridge]
40
+ Voices blend in harmony
41
+ Lost in pure cacophony
42
+ Timeless echoes timeless cries
43
+ Soulful shouts beneath the skies
44
+
45
+ [verse]
46
+ Keyboard dances on the keys
47
+ Melodies on evening breeze
48
+ Catch the tune and hold it tight
49
+ In this moment we take flight
50
+ """
51
+
52
+ # First, let's define the presets at the top of the file, after the imports
53
+ GENRE_PRESETS = {
54
+ "Modern Pop": "pop, synth, drums, guitar, 120 bpm, upbeat, catchy, vibrant, female vocals, polished vocals",
55
+ "Rock": "rock, electric guitar, drums, bass, 130 bpm, energetic, rebellious, gritty, male vocals, raw vocals",
56
+ "Hip Hop": "hip hop, 808 bass, hi-hats, synth, 90 bpm, bold, urban, intense, male vocals, rhythmic vocals",
57
+ "Country": "country, acoustic guitar, steel guitar, fiddle, 100 bpm, heartfelt, rustic, warm, male vocals, twangy vocals",
58
+ "EDM": "edm, synth, bass, kick drum, 128 bpm, euphoric, pulsating, energetic, instrumental",
59
+ "Reggae": "reggae, guitar, bass, drums, 80 bpm, chill, soulful, positive, male vocals, smooth vocals",
60
+ "Classical": "classical, orchestral, strings, piano, 60 bpm, elegant, emotive, timeless, instrumental",
61
+ "Jazz": "jazz, saxophone, piano, double bass, 110 bpm, smooth, improvisational, soulful, male vocals, crooning vocals",
62
+ "Metal": "metal, electric guitar, double kick drum, bass, 160 bpm, aggressive, intense, heavy, male vocals, screamed vocals",
63
+ "R&B": "r&b, synth, bass, drums, 85 bpm, sultry, groovy, romantic, female vocals, silky vocals"
64
+ }
65
+
66
+ # Add this function to handle preset selection
67
+ def update_tags_from_preset(preset_name):
68
+ if preset_name == "Custom":
69
+ return ""
70
+ return GENRE_PRESETS.get(preset_name, "")
71
+
72
+
73
+ def create_output_ui(task_name="Text2Music"):
74
+ # For many consumer-grade GPU devices, only one batch can be run
75
+ output_audio1 = gr.Audio(type="filepath", label=f"{task_name} Generated Audio 1")
76
+ # output_audio2 = gr.Audio(type="filepath", label="Generated Audio 2")
77
+ with gr.Accordion(f"{task_name} Parameters", open=False):
78
+ input_params_json = gr.JSON(label=f"{task_name} Parameters")
79
+ # outputs = [output_audio1, output_audio2]
80
+ outputs = [output_audio1]
81
+ return outputs, input_params_json
82
+
83
+
84
+ def dump_func(*args):
85
+ print(args)
86
+ return []
87
+
88
+
89
+ def create_text2music_ui(
90
+ gr,
91
+ text2music_process_func,
92
+ sample_data_func=None,
93
+ load_data_func=None,
94
+ ):
95
+
96
+ with gr.Row():
97
+ with gr.Column():
98
+ with gr.Row(equal_height=True):
99
+ # add markdown, tags and lyrics examples are from ai music generation community
100
+ audio_duration = gr.Slider(
101
+ -1,
102
+ 240.0,
103
+ step=0.00001,
104
+ value=-1,
105
+ label="Audio Duration",
106
+ interactive=True,
107
+ info="-1 means random duration (30 ~ 240).",
108
+ scale=9,
109
+ )
110
+ sample_bnt = gr.Button("Sample", variant="secondary", scale=1)
111
+
112
+ # audio2audio
113
+ with gr.Row(equal_height=True):
114
+ audio2audio_enable = gr.Checkbox(label="Enable Audio2Audio", value=False, info="Check to enable Audio-to-Audio generation using a reference audio.", elem_id="audio2audio_checkbox")
115
+ lora_name_or_path = gr.Dropdown(
116
+ label="Lora Name or Path",
117
+ choices=["ACE-Step/ACE-Step-v1-chinese-rap-LoRA", "none"],
118
+ value="none",
119
+ allow_custom_value=True,
120
+ )
121
+
122
+ ref_audio_input = gr.Audio(type="filepath", label="Reference Audio (for Audio2Audio)", visible=False, elem_id="ref_audio_input", show_download_button=True)
123
+ ref_audio_strength = gr.Slider(
124
+ label="Refer audio strength",
125
+ minimum=0.0,
126
+ maximum=1.0,
127
+ step=0.01,
128
+ value=0.5,
129
+ elem_id="ref_audio_strength",
130
+ visible=False,
131
+ interactive=True,
132
+ )
133
+
134
+ def toggle_ref_audio_visibility(is_checked):
135
+ return (
136
+ gr.update(visible=is_checked, elem_id="ref_audio_input"),
137
+ gr.update(visible=is_checked, elem_id="ref_audio_strength"),
138
+ )
139
+
140
+ audio2audio_enable.change(
141
+ fn=toggle_ref_audio_visibility,
142
+ inputs=[audio2audio_enable],
143
+ outputs=[ref_audio_input, ref_audio_strength],
144
+ )
145
+
146
+ with gr.Column(scale=2):
147
+ with gr.Group():
148
+ gr.Markdown("""<center>Support tags, descriptions, and scene. Use commas to separate different tags.<br>Tags and lyrics examples are from AI music generation community.</center>""")
149
+ with gr.Row():
150
+ genre_preset = gr.Dropdown(
151
+ choices=["Custom"] + list(GENRE_PRESETS.keys()),
152
+ value="Custom",
153
+ label="Preset",
154
+ scale=1,
155
+ )
156
+ prompt = gr.Textbox(
157
+ lines=1,
158
+ label="Tags",
159
+ max_lines=4,
160
+ value=TAG_DEFAULT,
161
+ scale=9,
162
+ )
163
+
164
+ # Add the change event for the preset dropdown
165
+ genre_preset.change(
166
+ fn=update_tags_from_preset,
167
+ inputs=[genre_preset],
168
+ outputs=[prompt]
169
+ )
170
+ with gr.Group():
171
+ gr.Markdown("""<center>Support lyric structure tags like [verse], [chorus], and [bridge] to separate different parts of the lyrics.<br>Use [instrumental] or [inst] to generate instrumental music. Not support genre structure tag in lyrics</center>""")
172
+ lyrics = gr.Textbox(
173
+ lines=9,
174
+ label="Lyrics",
175
+ max_lines=13,
176
+ value=LYRIC_DEFAULT,
177
+ )
178
+
179
+ with gr.Accordion("Basic Settings", open=False):
180
+ infer_step = gr.Slider(
181
+ minimum=1,
182
+ maximum=200,
183
+ step=1,
184
+ value=200,
185
+ label="Infer Steps",
186
+ interactive=True,
187
+ )
188
+ guidance_scale = gr.Slider(
189
+ minimum=0.0,
190
+ maximum=30.0,
191
+ step=0.1,
192
+ value=15.0,
193
+ label="Guidance Scale",
194
+ interactive=True,
195
+ info="When guidance_scale_lyric > 1 and guidance_scale_text > 1, the guidance scale will not be applied.",
196
+ )
197
+ guidance_scale_text = gr.Slider(
198
+ minimum=0.0,
199
+ maximum=10.0,
200
+ step=0.1,
201
+ value=0.0,
202
+ label="Guidance Scale Text",
203
+ interactive=True,
204
+ info="Guidance scale for text condition. It can only apply to cfg. set guidance_scale_text=5.0, guidance_scale_lyric=1.5 for start",
205
+ )
206
+ guidance_scale_lyric = gr.Slider(
207
+ minimum=0.0,
208
+ maximum=10.0,
209
+ step=0.1,
210
+ value=0.0,
211
+ label="Guidance Scale Lyric",
212
+ interactive=True,
213
+ )
214
+
215
+ manual_seeds = gr.Textbox(
216
+ label="manual seeds (default None)",
217
+ placeholder="1,2,3,4",
218
+ value=None,
219
+ info="Seed for the generation",
220
+ )
221
+
222
+ with gr.Accordion("Advanced Settings", open=False):
223
+ scheduler_type = gr.Radio(
224
+ ["euler", "heun"],
225
+ value="euler",
226
+ label="Scheduler Type",
227
+ elem_id="scheduler_type",
228
+ info="Scheduler type for the generation. euler is recommended. heun will take more time.",
229
+ )
230
+ cfg_type = gr.Radio(
231
+ ["cfg", "apg", "cfg_star"],
232
+ value="apg",
233
+ label="CFG Type",
234
+ elem_id="cfg_type",
235
+ info="CFG type for the generation. apg is recommended. cfg and cfg_star are almost the same.",
236
+ )
237
+ use_erg_tag = gr.Checkbox(
238
+ label="use ERG for tag",
239
+ value=True,
240
+ info="Use Entropy Rectifying Guidance for tag. It will multiple a temperature to the attention to make a weaker tag condition and make better diversity.",
241
+ )
242
+ use_erg_lyric = gr.Checkbox(
243
+ label="use ERG for lyric",
244
+ value=False,
245
+ info="The same but apply to lyric encoder's attention.",
246
+ )
247
+ use_erg_diffusion = gr.Checkbox(
248
+ label="use ERG for diffusion",
249
+ value=True,
250
+ info="The same but apply to diffusion model's attention.",
251
+ )
252
+
253
+ omega_scale = gr.Slider(
254
+ minimum=-100.0,
255
+ maximum=100.0,
256
+ step=0.1,
257
+ value=10.0,
258
+ label="Granularity Scale",
259
+ interactive=True,
260
+ info="Granularity scale for the generation. Higher values can reduce artifacts",
261
+ )
262
+
263
+ guidance_interval = gr.Slider(
264
+ minimum=0.0,
265
+ maximum=1.0,
266
+ step=0.01,
267
+ value=0.5,
268
+ label="Guidance Interval",
269
+ interactive=True,
270
+ info="Guidance interval for the generation. 0.5 means only apply guidance in the middle steps (0.25 * infer_steps to 0.75 * infer_steps)",
271
+ )
272
+ guidance_interval_decay = gr.Slider(
273
+ minimum=0.0,
274
+ maximum=1.0,
275
+ step=0.01,
276
+ value=0.0,
277
+ label="Guidance Interval Decay",
278
+ interactive=True,
279
+ info="Guidance interval decay for the generation. Guidance scale will decay from guidance_scale to min_guidance_scale in the interval. 0.0 means no decay.",
280
+ )
281
+ min_guidance_scale = gr.Slider(
282
+ minimum=0.0,
283
+ maximum=200.0,
284
+ step=0.1,
285
+ value=3.0,
286
+ label="Min Guidance Scale",
287
+ interactive=True,
288
+ info="Min guidance scale for guidance interval decay's end scale",
289
+ )
290
+ oss_steps = gr.Textbox(
291
+ label="OSS Steps",
292
+ placeholder="16, 29, 52, 96, 129, 158, 172, 183, 189, 200",
293
+ value=None,
294
+ info="Optimal Steps for the generation. But not test well",
295
+ )
296
+
297
+ text2music_bnt = gr.Button("Generate", variant="primary")
298
+
299
+ with gr.Column():
300
+ outputs, input_params_json = create_output_ui()
301
+ with gr.Tab("retake"):
302
+ retake_variance = gr.Slider(
303
+ minimum=0.0, maximum=1.0, step=0.01, value=0.2, label="variance"
304
+ )
305
+ retake_seeds = gr.Textbox(
306
+ label="retake seeds (default None)", placeholder="", value=None
307
+ )
308
+ retake_bnt = gr.Button("Retake", variant="primary")
309
+ retake_outputs, retake_input_params_json = create_output_ui("Retake")
310
+
311
+ def retake_process_func(json_data, retake_variance, retake_seeds):
312
+ return text2music_process_func(
313
+ json_data["audio_duration"],
314
+ json_data["prompt"],
315
+ json_data["lyrics"],
316
+ json_data["infer_step"],
317
+ json_data["guidance_scale"],
318
+ json_data["scheduler_type"],
319
+ json_data["cfg_type"],
320
+ json_data["omega_scale"],
321
+ ", ".join(map(str, json_data["actual_seeds"])),
322
+ json_data["guidance_interval"],
323
+ json_data["guidance_interval_decay"],
324
+ json_data["min_guidance_scale"],
325
+ json_data["use_erg_tag"],
326
+ json_data["use_erg_lyric"],
327
+ json_data["use_erg_diffusion"],
328
+ ", ".join(map(str, json_data["oss_steps"])),
329
+ (
330
+ json_data["guidance_scale_text"]
331
+ if "guidance_scale_text" in json_data
332
+ else 0.0
333
+ ),
334
+ (
335
+ json_data["guidance_scale_lyric"]
336
+ if "guidance_scale_lyric" in json_data
337
+ else 0.0
338
+ ),
339
+ retake_seeds=retake_seeds,
340
+ retake_variance=retake_variance,
341
+ task="retake",
342
+ lora_name_or_path="none" if "lora_name_or_path" not in json_data else json_data["lora_name_or_path"]
343
+ )
344
+
345
+ retake_bnt.click(
346
+ fn=retake_process_func,
347
+ inputs=[
348
+ input_params_json,
349
+ retake_variance,
350
+ retake_seeds,
351
+ ],
352
+ outputs=retake_outputs + [retake_input_params_json],
353
+ )
354
+ with gr.Tab("repainting"):
355
+ retake_variance = gr.Slider(
356
+ minimum=0.0, maximum=1.0, step=0.01, value=0.2, label="variance"
357
+ )
358
+ retake_seeds = gr.Textbox(
359
+ label="repaint seeds (default None)", placeholder="", value=None
360
+ )
361
+ repaint_start = gr.Slider(
362
+ minimum=0.0,
363
+ maximum=240.0,
364
+ step=0.01,
365
+ value=0.0,
366
+ label="Repaint Start Time",
367
+ interactive=True,
368
+ )
369
+ repaint_end = gr.Slider(
370
+ minimum=0.0,
371
+ maximum=240.0,
372
+ step=0.01,
373
+ value=30.0,
374
+ label="Repaint End Time",
375
+ interactive=True,
376
+ )
377
+ repaint_source = gr.Radio(
378
+ ["text2music", "last_repaint", "upload"],
379
+ value="text2music",
380
+ label="Repaint Source",
381
+ elem_id="repaint_source",
382
+ )
383
+
384
+ repaint_source_audio_upload = gr.Audio(
385
+ label="Upload Audio",
386
+ type="filepath",
387
+ visible=False,
388
+ elem_id="repaint_source_audio_upload",
389
+ show_download_button=True,
390
+ )
391
+ repaint_source.change(
392
+ fn=lambda x: gr.update(
393
+ visible=x == "upload", elem_id="repaint_source_audio_upload"
394
+ ),
395
+ inputs=[repaint_source],
396
+ outputs=[repaint_source_audio_upload],
397
+ )
398
+
399
+ repaint_bnt = gr.Button("Repaint", variant="primary")
400
+ repaint_outputs, repaint_input_params_json = create_output_ui("Repaint")
401
+
402
+ def repaint_process_func(
403
+ text2music_json_data,
404
+ repaint_json_data,
405
+ retake_variance,
406
+ retake_seeds,
407
+ repaint_start,
408
+ repaint_end,
409
+ repaint_source,
410
+ repaint_source_audio_upload,
411
+ prompt,
412
+ lyrics,
413
+ infer_step,
414
+ guidance_scale,
415
+ scheduler_type,
416
+ cfg_type,
417
+ omega_scale,
418
+ manual_seeds,
419
+ guidance_interval,
420
+ guidance_interval_decay,
421
+ min_guidance_scale,
422
+ use_erg_tag,
423
+ use_erg_lyric,
424
+ use_erg_diffusion,
425
+ oss_steps,
426
+ guidance_scale_text,
427
+ guidance_scale_lyric,
428
+ ):
429
+ if repaint_source == "upload":
430
+ src_audio_path = repaint_source_audio_upload
431
+ audio_duration = librosa.get_duration(filename=src_audio_path)
432
+ json_data = {"audio_duration": audio_duration}
433
+ elif repaint_source == "text2music":
434
+ json_data = text2music_json_data
435
+ src_audio_path = json_data["audio_path"]
436
+ elif repaint_source == "last_repaint":
437
+ json_data = repaint_json_data
438
+ src_audio_path = json_data["audio_path"]
439
+
440
+ return text2music_process_func(
441
+ json_data["audio_duration"],
442
+ prompt,
443
+ lyrics,
444
+ infer_step,
445
+ guidance_scale,
446
+ scheduler_type,
447
+ cfg_type,
448
+ omega_scale,
449
+ manual_seeds,
450
+ guidance_interval,
451
+ guidance_interval_decay,
452
+ min_guidance_scale,
453
+ use_erg_tag,
454
+ use_erg_lyric,
455
+ use_erg_diffusion,
456
+ oss_steps,
457
+ guidance_scale_text,
458
+ guidance_scale_lyric,
459
+ retake_seeds=retake_seeds,
460
+ retake_variance=retake_variance,
461
+ task="repaint",
462
+ repaint_start=repaint_start,
463
+ repaint_end=repaint_end,
464
+ src_audio_path=src_audio_path,
465
+ lora_name_or_path="none" if "lora_name_or_path" not in json_data else json_data["lora_name_or_path"]
466
+ )
467
+
468
+ repaint_bnt.click(
469
+ fn=repaint_process_func,
470
+ inputs=[
471
+ input_params_json,
472
+ repaint_input_params_json,
473
+ retake_variance,
474
+ retake_seeds,
475
+ repaint_start,
476
+ repaint_end,
477
+ repaint_source,
478
+ repaint_source_audio_upload,
479
+ prompt,
480
+ lyrics,
481
+ infer_step,
482
+ guidance_scale,
483
+ scheduler_type,
484
+ cfg_type,
485
+ omega_scale,
486
+ manual_seeds,
487
+ guidance_interval,
488
+ guidance_interval_decay,
489
+ min_guidance_scale,
490
+ use_erg_tag,
491
+ use_erg_lyric,
492
+ use_erg_diffusion,
493
+ oss_steps,
494
+ guidance_scale_text,
495
+ guidance_scale_lyric,
496
+ ],
497
+ outputs=repaint_outputs + [repaint_input_params_json],
498
+ )
499
+ with gr.Tab("edit"):
500
+ edit_prompt = gr.Textbox(lines=2, label="Edit Tags", max_lines=4)
501
+ edit_lyrics = gr.Textbox(lines=9, label="Edit Lyrics", max_lines=13)
502
+ retake_seeds = gr.Textbox(
503
+ label="edit seeds (default None)", placeholder="", value=None
504
+ )
505
+
506
+ edit_type = gr.Radio(
507
+ ["only_lyrics", "remix"],
508
+ value="only_lyrics",
509
+ label="Edit Type",
510
+ elem_id="edit_type",
511
+ info="`only_lyrics` will keep the whole song the same except lyrics difference. Make your diffrence smaller, e.g. one lyrc line change.\nremix can change the song melody and genre",
512
+ )
513
+ edit_n_min = gr.Slider(
514
+ minimum=0.0,
515
+ maximum=1.0,
516
+ step=0.01,
517
+ value=0.6,
518
+ label="edit_n_min",
519
+ interactive=True,
520
+ )
521
+ edit_n_max = gr.Slider(
522
+ minimum=0.0,
523
+ maximum=1.0,
524
+ step=0.01,
525
+ value=1.0,
526
+ label="edit_n_max",
527
+ interactive=True,
528
+ )
529
+
530
+ def edit_type_change_func(edit_type):
531
+ if edit_type == "only_lyrics":
532
+ n_min = 0.6
533
+ n_max = 1.0
534
+ elif edit_type == "remix":
535
+ n_min = 0.2
536
+ n_max = 0.4
537
+ return n_min, n_max
538
+
539
+ edit_type.change(
540
+ edit_type_change_func,
541
+ inputs=[edit_type],
542
+ outputs=[edit_n_min, edit_n_max],
543
+ )
544
+
545
+ edit_source = gr.Radio(
546
+ ["text2music", "last_edit", "upload"],
547
+ value="text2music",
548
+ label="Edit Source",
549
+ elem_id="edit_source",
550
+ )
551
+ edit_source_audio_upload = gr.Audio(
552
+ label="Upload Audio",
553
+ type="filepath",
554
+ visible=False,
555
+ elem_id="edit_source_audio_upload",
556
+ show_download_button=True,
557
+ )
558
+ edit_source.change(
559
+ fn=lambda x: gr.update(
560
+ visible=x == "upload", elem_id="edit_source_audio_upload"
561
+ ),
562
+ inputs=[edit_source],
563
+ outputs=[edit_source_audio_upload],
564
+ )
565
+
566
+ edit_bnt = gr.Button("Edit", variant="primary")
567
+ edit_outputs, edit_input_params_json = create_output_ui("Edit")
568
+
569
+ def edit_process_func(
570
+ text2music_json_data,
571
+ edit_input_params_json,
572
+ edit_source,
573
+ edit_source_audio_upload,
574
+ prompt,
575
+ lyrics,
576
+ edit_prompt,
577
+ edit_lyrics,
578
+ edit_n_min,
579
+ edit_n_max,
580
+ infer_step,
581
+ guidance_scale,
582
+ scheduler_type,
583
+ cfg_type,
584
+ omega_scale,
585
+ manual_seeds,
586
+ guidance_interval,
587
+ guidance_interval_decay,
588
+ min_guidance_scale,
589
+ use_erg_tag,
590
+ use_erg_lyric,
591
+ use_erg_diffusion,
592
+ oss_steps,
593
+ guidance_scale_text,
594
+ guidance_scale_lyric,
595
+ retake_seeds,
596
+ ):
597
+ if edit_source == "upload":
598
+ src_audio_path = edit_source_audio_upload
599
+ audio_duration = librosa.get_duration(filename=src_audio_path)
600
+ json_data = {"audio_duration": audio_duration}
601
+ elif edit_source == "text2music":
602
+ json_data = text2music_json_data
603
+ src_audio_path = json_data["audio_path"]
604
+ elif edit_source == "last_edit":
605
+ json_data = edit_input_params_json
606
+ src_audio_path = json_data["audio_path"]
607
+
608
+ if not edit_prompt:
609
+ edit_prompt = prompt
610
+ if not edit_lyrics:
611
+ edit_lyrics = lyrics
612
+
613
+ return text2music_process_func(
614
+ json_data["audio_duration"],
615
+ prompt,
616
+ lyrics,
617
+ infer_step,
618
+ guidance_scale,
619
+ scheduler_type,
620
+ cfg_type,
621
+ omega_scale,
622
+ manual_seeds,
623
+ guidance_interval,
624
+ guidance_interval_decay,
625
+ min_guidance_scale,
626
+ use_erg_tag,
627
+ use_erg_lyric,
628
+ use_erg_diffusion,
629
+ oss_steps,
630
+ guidance_scale_text,
631
+ guidance_scale_lyric,
632
+ task="edit",
633
+ src_audio_path=src_audio_path,
634
+ edit_target_prompt=edit_prompt,
635
+ edit_target_lyrics=edit_lyrics,
636
+ edit_n_min=edit_n_min,
637
+ edit_n_max=edit_n_max,
638
+ retake_seeds=retake_seeds,
639
+ lora_name_or_path="none" if "lora_name_or_path" not in json_data else json_data["lora_name_or_path"]
640
+ )
641
+
642
+ edit_bnt.click(
643
+ fn=edit_process_func,
644
+ inputs=[
645
+ input_params_json,
646
+ edit_input_params_json,
647
+ edit_source,
648
+ edit_source_audio_upload,
649
+ prompt,
650
+ lyrics,
651
+ edit_prompt,
652
+ edit_lyrics,
653
+ edit_n_min,
654
+ edit_n_max,
655
+ infer_step,
656
+ guidance_scale,
657
+ scheduler_type,
658
+ cfg_type,
659
+ omega_scale,
660
+ manual_seeds,
661
+ guidance_interval,
662
+ guidance_interval_decay,
663
+ min_guidance_scale,
664
+ use_erg_tag,
665
+ use_erg_lyric,
666
+ use_erg_diffusion,
667
+ oss_steps,
668
+ guidance_scale_text,
669
+ guidance_scale_lyric,
670
+ retake_seeds,
671
+ ],
672
+ outputs=edit_outputs + [edit_input_params_json],
673
+ )
674
+ with gr.Tab("extend"):
675
+ extend_seeds = gr.Textbox(
676
+ label="extend seeds (default None)", placeholder="", value=None
677
+ )
678
+ left_extend_length = gr.Slider(
679
+ minimum=0.0,
680
+ maximum=240.0,
681
+ step=0.01,
682
+ value=0.0,
683
+ label="Left Extend Length",
684
+ interactive=True,
685
+ )
686
+ right_extend_length = gr.Slider(
687
+ minimum=0.0,
688
+ maximum=240.0,
689
+ step=0.01,
690
+ value=30.0,
691
+ label="Right Extend Length",
692
+ interactive=True,
693
+ )
694
+ extend_source = gr.Radio(
695
+ ["text2music", "last_extend", "upload"],
696
+ value="text2music",
697
+ label="Extend Source",
698
+ elem_id="extend_source",
699
+ )
700
+
701
+ extend_source_audio_upload = gr.Audio(
702
+ label="Upload Audio",
703
+ type="filepath",
704
+ visible=False,
705
+ elem_id="extend_source_audio_upload",
706
+ show_download_button=True,
707
+ )
708
+ extend_source.change(
709
+ fn=lambda x: gr.update(
710
+ visible=x == "upload", elem_id="extend_source_audio_upload"
711
+ ),
712
+ inputs=[extend_source],
713
+ outputs=[extend_source_audio_upload],
714
+ )
715
+
716
+ extend_bnt = gr.Button("Extend", variant="primary")
717
+ extend_outputs, extend_input_params_json = create_output_ui("Extend")
718
+
719
+ def extend_process_func(
720
+ text2music_json_data,
721
+ extend_input_params_json,
722
+ extend_seeds,
723
+ left_extend_length,
724
+ right_extend_length,
725
+ extend_source,
726
+ extend_source_audio_upload,
727
+ prompt,
728
+ lyrics,
729
+ infer_step,
730
+ guidance_scale,
731
+ scheduler_type,
732
+ cfg_type,
733
+ omega_scale,
734
+ manual_seeds,
735
+ guidance_interval,
736
+ guidance_interval_decay,
737
+ min_guidance_scale,
738
+ use_erg_tag,
739
+ use_erg_lyric,
740
+ use_erg_diffusion,
741
+ oss_steps,
742
+ guidance_scale_text,
743
+ guidance_scale_lyric,
744
+ ):
745
+ if extend_source == "upload":
746
+ src_audio_path = extend_source_audio_upload
747
+ # get audio duration
748
+ audio_duration = librosa.get_duration(filename=src_audio_path)
749
+ json_data = {"audio_duration": audio_duration}
750
+ elif extend_source == "text2music":
751
+ json_data = text2music_json_data
752
+ src_audio_path = json_data["audio_path"]
753
+ elif extend_source == "last_extend":
754
+ json_data = extend_input_params_json
755
+ src_audio_path = json_data["audio_path"]
756
+
757
+ repaint_start = -left_extend_length
758
+ repaint_end = json_data["audio_duration"] + right_extend_length
759
+ return text2music_process_func(
760
+ json_data["audio_duration"],
761
+ prompt,
762
+ lyrics,
763
+ infer_step,
764
+ guidance_scale,
765
+ scheduler_type,
766
+ cfg_type,
767
+ omega_scale,
768
+ manual_seeds,
769
+ guidance_interval,
770
+ guidance_interval_decay,
771
+ min_guidance_scale,
772
+ use_erg_tag,
773
+ use_erg_lyric,
774
+ use_erg_diffusion,
775
+ oss_steps,
776
+ guidance_scale_text,
777
+ guidance_scale_lyric,
778
+ retake_seeds=extend_seeds,
779
+ retake_variance=1.0,
780
+ task="extend",
781
+ repaint_start=repaint_start,
782
+ repaint_end=repaint_end,
783
+ src_audio_path=src_audio_path,
784
+ lora_name_or_path="none" if "lora_name_or_path" not in json_data else json_data["lora_name_or_path"]
785
+ )
786
+
787
+ extend_bnt.click(
788
+ fn=extend_process_func,
789
+ inputs=[
790
+ input_params_json,
791
+ extend_input_params_json,
792
+ extend_seeds,
793
+ left_extend_length,
794
+ right_extend_length,
795
+ extend_source,
796
+ extend_source_audio_upload,
797
+ prompt,
798
+ lyrics,
799
+ infer_step,
800
+ guidance_scale,
801
+ scheduler_type,
802
+ cfg_type,
803
+ omega_scale,
804
+ manual_seeds,
805
+ guidance_interval,
806
+ guidance_interval_decay,
807
+ min_guidance_scale,
808
+ use_erg_tag,
809
+ use_erg_lyric,
810
+ use_erg_diffusion,
811
+ oss_steps,
812
+ guidance_scale_text,
813
+ guidance_scale_lyric,
814
+ ],
815
+ outputs=extend_outputs + [extend_input_params_json],
816
+ )
817
+
818
+ def json2output(json_data):
819
+ return (
820
+ json_data["audio_duration"],
821
+ json_data["prompt"],
822
+ json_data["lyrics"],
823
+ json_data["infer_step"],
824
+ json_data["guidance_scale"],
825
+ json_data["scheduler_type"],
826
+ json_data["cfg_type"],
827
+ json_data["omega_scale"],
828
+ ", ".join(map(str, json_data["actual_seeds"])),
829
+ json_data["guidance_interval"],
830
+ json_data["guidance_interval_decay"],
831
+ json_data["min_guidance_scale"],
832
+ json_data["use_erg_tag"],
833
+ json_data["use_erg_lyric"],
834
+ json_data["use_erg_diffusion"],
835
+ ", ".join(map(str, json_data["oss_steps"])),
836
+ (
837
+ json_data["guidance_scale_text"]
838
+ if "guidance_scale_text" in json_data
839
+ else 0.0
840
+ ),
841
+ (
842
+ json_data["guidance_scale_lyric"]
843
+ if "guidance_scale_lyric" in json_data
844
+ else 0.0
845
+ ),
846
+ (
847
+ json_data["audio2audio_enable"]
848
+ if "audio2audio_enable" in json_data
849
+ else False
850
+ ),
851
+ (
852
+ json_data["ref_audio_strength"]
853
+ if "ref_audio_strength" in json_data
854
+ else 0.5
855
+ ),
856
+ (
857
+ json_data["ref_audio_input"]
858
+ if "ref_audio_input" in json_data
859
+ else None
860
+ ),
861
+ )
862
+
863
+ def sample_data(lora_name_or_path_):
864
+ json_data = sample_data_func(lora_name_or_path_)
865
+ return json2output(json_data)
866
+
867
+ sample_bnt.click(
868
+ sample_data,
869
+ inputs=[lora_name_or_path],
870
+ outputs=[
871
+ audio_duration,
872
+ prompt,
873
+ lyrics,
874
+ infer_step,
875
+ guidance_scale,
876
+ scheduler_type,
877
+ cfg_type,
878
+ omega_scale,
879
+ manual_seeds,
880
+ guidance_interval,
881
+ guidance_interval_decay,
882
+ min_guidance_scale,
883
+ use_erg_tag,
884
+ use_erg_lyric,
885
+ use_erg_diffusion,
886
+ oss_steps,
887
+ guidance_scale_text,
888
+ guidance_scale_lyric,
889
+ audio2audio_enable,
890
+ ref_audio_strength,
891
+ ref_audio_input,
892
+ ],
893
+ )
894
+
895
+ text2music_bnt.click(
896
+ fn=text2music_process_func,
897
+ inputs=[
898
+ audio_duration,
899
+ prompt,
900
+ lyrics,
901
+ infer_step,
902
+ guidance_scale,
903
+ scheduler_type,
904
+ cfg_type,
905
+ omega_scale,
906
+ manual_seeds,
907
+ guidance_interval,
908
+ guidance_interval_decay,
909
+ min_guidance_scale,
910
+ use_erg_tag,
911
+ use_erg_lyric,
912
+ use_erg_diffusion,
913
+ oss_steps,
914
+ guidance_scale_text,
915
+ guidance_scale_lyric,
916
+ audio2audio_enable,
917
+ ref_audio_strength,
918
+ ref_audio_input,
919
+ lora_name_or_path,
920
+ ],
921
+ outputs=outputs + [input_params_json],
922
+ )
923
+
924
+
925
+ def create_main_demo_ui(
926
+ text2music_process_func=dump_func,
927
+ sample_data_func=dump_func,
928
+ load_data_func=dump_func,
929
+ ):
930
+ with gr.Blocks(
931
+ title="ACE-Step Model 1.0 DEMO",
932
+ ) as demo:
933
+ gr.Markdown(
934
+ """
935
+ <h1 style="text-align: center;">ACE-Step: A Step Towards Music Generation Foundation Model</h1>
936
+ <p>
937
+ <a href="https://ace-step.github.io/" target='_blank'>Project</a> |
938
+ <a href="https://huggingface.co/ACE-Step/ACE-Step-v1-3.5B">Checkpoints</a> |
939
+ <a href="https://discord.gg/rjAZz2xBdG" target='_blank'>Discord</a>
940
+ </p>
941
+ """
942
+ )
943
+ with gr.Tab("text2music"):
944
+ create_text2music_ui(
945
+ gr=gr,
946
+ text2music_process_func=text2music_process_func,
947
+ sample_data_func=sample_data_func,
948
+ load_data_func=load_data_func,
949
+ )
950
+ return demo
951
+
952
+
953
+ if __name__ == "__main__":
954
+ demo = create_main_demo_ui()
955
+ demo.launch(
956
+ server_name="0.0.0.0",
957
+ server_port=7860,
958
+ )