ginipick commited on
Commit
8f7c6b0
·
verified ·
1 Parent(s): a4f5bc8

Delete ui/components-backup.py

Browse files
Files changed (1) hide show
  1. ui/components-backup.py +0 -958
ui/components-backup.py DELETED
@@ -1,958 +0,0 @@
1
- """
2
- ACE-Step: A Step Towards Music Generation Foundation Model
3
-
4
- https://github.com/ace-step/ACE-Step
5
-
6
- Apache 2.0 License
7
- """
8
-
9
- import gradio as gr
10
- import librosa
11
- import os
12
-
13
-
14
- TAG_DEFAULT = "funk, pop, soul, rock, melodic, guitar, drums, bass, keyboard, percussion, 105 BPM, energetic, upbeat, groovy, vibrant, dynamic"
15
- LYRIC_DEFAULT = """[verse]
16
- Neon lights they flicker bright
17
- City hums in dead of night
18
- Rhythms pulse through concrete veins
19
- Lost in echoes of refrains
20
-
21
- [verse]
22
- Bassline groovin' in my chest
23
- Heartbeats match the city's zest
24
- Electric whispers fill the air
25
- Synthesized dreams everywhere
26
-
27
- [chorus]
28
- Turn it up and let it flow
29
- Feel the fire let it grow
30
- In this rhythm we belong
31
- Hear the night sing out our song
32
-
33
- [verse]
34
- Guitar strings they start to weep
35
- Wake the soul from silent sleep
36
- Every note a story told
37
- In this night we’re bold and gold
38
-
39
- [bridge]
40
- Voices blend in harmony
41
- Lost in pure cacophony
42
- Timeless echoes timeless cries
43
- Soulful shouts beneath the skies
44
-
45
- [verse]
46
- Keyboard dances on the keys
47
- Melodies on evening breeze
48
- Catch the tune and hold it tight
49
- In this moment we take flight
50
- """
51
-
52
- # First, let's define the presets at the top of the file, after the imports
53
- GENRE_PRESETS = {
54
- "Modern Pop": "pop, synth, drums, guitar, 120 bpm, upbeat, catchy, vibrant, female vocals, polished vocals",
55
- "Rock": "rock, electric guitar, drums, bass, 130 bpm, energetic, rebellious, gritty, male vocals, raw vocals",
56
- "Hip Hop": "hip hop, 808 bass, hi-hats, synth, 90 bpm, bold, urban, intense, male vocals, rhythmic vocals",
57
- "Country": "country, acoustic guitar, steel guitar, fiddle, 100 bpm, heartfelt, rustic, warm, male vocals, twangy vocals",
58
- "EDM": "edm, synth, bass, kick drum, 128 bpm, euphoric, pulsating, energetic, instrumental",
59
- "Reggae": "reggae, guitar, bass, drums, 80 bpm, chill, soulful, positive, male vocals, smooth vocals",
60
- "Classical": "classical, orchestral, strings, piano, 60 bpm, elegant, emotive, timeless, instrumental",
61
- "Jazz": "jazz, saxophone, piano, double bass, 110 bpm, smooth, improvisational, soulful, male vocals, crooning vocals",
62
- "Metal": "metal, electric guitar, double kick drum, bass, 160 bpm, aggressive, intense, heavy, male vocals, screamed vocals",
63
- "R&B": "r&b, synth, bass, drums, 85 bpm, sultry, groovy, romantic, female vocals, silky vocals"
64
- }
65
-
66
- # Add this function to handle preset selection
67
- def update_tags_from_preset(preset_name):
68
- if preset_name == "Custom":
69
- return ""
70
- return GENRE_PRESETS.get(preset_name, "")
71
-
72
-
73
- def create_output_ui(task_name="Text2Music"):
74
- # For many consumer-grade GPU devices, only one batch can be run
75
- output_audio1 = gr.Audio(type="filepath", label=f"{task_name} Generated Audio 1")
76
- # output_audio2 = gr.Audio(type="filepath", label="Generated Audio 2")
77
- with gr.Accordion(f"{task_name} Parameters", open=False):
78
- input_params_json = gr.JSON(label=f"{task_name} Parameters")
79
- # outputs = [output_audio1, output_audio2]
80
- outputs = [output_audio1]
81
- return outputs, input_params_json
82
-
83
-
84
- def dump_func(*args):
85
- print(args)
86
- return []
87
-
88
-
89
- def create_text2music_ui(
90
- gr,
91
- text2music_process_func,
92
- sample_data_func=None,
93
- load_data_func=None,
94
- ):
95
-
96
- with gr.Row():
97
- with gr.Column():
98
- with gr.Row(equal_height=True):
99
- # add markdown, tags and lyrics examples are from ai music generation community
100
- audio_duration = gr.Slider(
101
- -1,
102
- 240.0,
103
- step=0.00001,
104
- value=-1,
105
- label="Audio Duration",
106
- interactive=True,
107
- info="-1 means random duration (30 ~ 240).",
108
- scale=9,
109
- )
110
- sample_bnt = gr.Button("Sample", variant="secondary", scale=1)
111
-
112
- # audio2audio
113
- with gr.Row(equal_height=True):
114
- audio2audio_enable = gr.Checkbox(label="Enable Audio2Audio", value=False, info="Check to enable Audio-to-Audio generation using a reference audio.", elem_id="audio2audio_checkbox")
115
- lora_name_or_path = gr.Dropdown(
116
- label="Lora Name or Path",
117
- choices=["ACE-Step/ACE-Step-v1-chinese-rap-LoRA", "none"],
118
- value="none",
119
- allow_custom_value=True,
120
- )
121
-
122
- ref_audio_input = gr.Audio(type="filepath", label="Reference Audio (for Audio2Audio)", visible=False, elem_id="ref_audio_input", show_download_button=True)
123
- ref_audio_strength = gr.Slider(
124
- label="Refer audio strength",
125
- minimum=0.0,
126
- maximum=1.0,
127
- step=0.01,
128
- value=0.5,
129
- elem_id="ref_audio_strength",
130
- visible=False,
131
- interactive=True,
132
- )
133
-
134
- def toggle_ref_audio_visibility(is_checked):
135
- return (
136
- gr.update(visible=is_checked, elem_id="ref_audio_input"),
137
- gr.update(visible=is_checked, elem_id="ref_audio_strength"),
138
- )
139
-
140
- audio2audio_enable.change(
141
- fn=toggle_ref_audio_visibility,
142
- inputs=[audio2audio_enable],
143
- outputs=[ref_audio_input, ref_audio_strength],
144
- )
145
-
146
- with gr.Column(scale=2):
147
- with gr.Group():
148
- gr.Markdown("""<center>Support tags, descriptions, and scene. Use commas to separate different tags.<br>Tags and lyrics examples are from AI music generation community.</center>""")
149
- with gr.Row():
150
- genre_preset = gr.Dropdown(
151
- choices=["Custom"] + list(GENRE_PRESETS.keys()),
152
- value="Custom",
153
- label="Preset",
154
- scale=1,
155
- )
156
- prompt = gr.Textbox(
157
- lines=1,
158
- label="Tags",
159
- max_lines=4,
160
- value=TAG_DEFAULT,
161
- scale=9,
162
- )
163
-
164
- # Add the change event for the preset dropdown
165
- genre_preset.change(
166
- fn=update_tags_from_preset,
167
- inputs=[genre_preset],
168
- outputs=[prompt]
169
- )
170
- with gr.Group():
171
- gr.Markdown("""<center>Support lyric structure tags like [verse], [chorus], and [bridge] to separate different parts of the lyrics.<br>Use [instrumental] or [inst] to generate instrumental music. Not support genre structure tag in lyrics</center>""")
172
- lyrics = gr.Textbox(
173
- lines=9,
174
- label="Lyrics",
175
- max_lines=13,
176
- value=LYRIC_DEFAULT,
177
- )
178
-
179
- with gr.Accordion("Basic Settings", open=False):
180
- infer_step = gr.Slider(
181
- minimum=1,
182
- maximum=200,
183
- step=1,
184
- value=200,
185
- label="Infer Steps",
186
- interactive=True,
187
- )
188
- guidance_scale = gr.Slider(
189
- minimum=0.0,
190
- maximum=30.0,
191
- step=0.1,
192
- value=15.0,
193
- label="Guidance Scale",
194
- interactive=True,
195
- info="When guidance_scale_lyric > 1 and guidance_scale_text > 1, the guidance scale will not be applied.",
196
- )
197
- guidance_scale_text = gr.Slider(
198
- minimum=0.0,
199
- maximum=10.0,
200
- step=0.1,
201
- value=0.0,
202
- label="Guidance Scale Text",
203
- interactive=True,
204
- info="Guidance scale for text condition. It can only apply to cfg. set guidance_scale_text=5.0, guidance_scale_lyric=1.5 for start",
205
- )
206
- guidance_scale_lyric = gr.Slider(
207
- minimum=0.0,
208
- maximum=10.0,
209
- step=0.1,
210
- value=0.0,
211
- label="Guidance Scale Lyric",
212
- interactive=True,
213
- )
214
-
215
- manual_seeds = gr.Textbox(
216
- label="manual seeds (default None)",
217
- placeholder="1,2,3,4",
218
- value=None,
219
- info="Seed for the generation",
220
- )
221
-
222
- with gr.Accordion("Advanced Settings", open=False):
223
- scheduler_type = gr.Radio(
224
- ["euler", "heun"],
225
- value="euler",
226
- label="Scheduler Type",
227
- elem_id="scheduler_type",
228
- info="Scheduler type for the generation. euler is recommended. heun will take more time.",
229
- )
230
- cfg_type = gr.Radio(
231
- ["cfg", "apg", "cfg_star"],
232
- value="apg",
233
- label="CFG Type",
234
- elem_id="cfg_type",
235
- info="CFG type for the generation. apg is recommended. cfg and cfg_star are almost the same.",
236
- )
237
- use_erg_tag = gr.Checkbox(
238
- label="use ERG for tag",
239
- value=True,
240
- info="Use Entropy Rectifying Guidance for tag. It will multiple a temperature to the attention to make a weaker tag condition and make better diversity.",
241
- )
242
- use_erg_lyric = gr.Checkbox(
243
- label="use ERG for lyric",
244
- value=False,
245
- info="The same but apply to lyric encoder's attention.",
246
- )
247
- use_erg_diffusion = gr.Checkbox(
248
- label="use ERG for diffusion",
249
- value=True,
250
- info="The same but apply to diffusion model's attention.",
251
- )
252
-
253
- omega_scale = gr.Slider(
254
- minimum=-100.0,
255
- maximum=100.0,
256
- step=0.1,
257
- value=10.0,
258
- label="Granularity Scale",
259
- interactive=True,
260
- info="Granularity scale for the generation. Higher values can reduce artifacts",
261
- )
262
-
263
- guidance_interval = gr.Slider(
264
- minimum=0.0,
265
- maximum=1.0,
266
- step=0.01,
267
- value=0.5,
268
- label="Guidance Interval",
269
- interactive=True,
270
- info="Guidance interval for the generation. 0.5 means only apply guidance in the middle steps (0.25 * infer_steps to 0.75 * infer_steps)",
271
- )
272
- guidance_interval_decay = gr.Slider(
273
- minimum=0.0,
274
- maximum=1.0,
275
- step=0.01,
276
- value=0.0,
277
- label="Guidance Interval Decay",
278
- interactive=True,
279
- info="Guidance interval decay for the generation. Guidance scale will decay from guidance_scale to min_guidance_scale in the interval. 0.0 means no decay.",
280
- )
281
- min_guidance_scale = gr.Slider(
282
- minimum=0.0,
283
- maximum=200.0,
284
- step=0.1,
285
- value=3.0,
286
- label="Min Guidance Scale",
287
- interactive=True,
288
- info="Min guidance scale for guidance interval decay's end scale",
289
- )
290
- oss_steps = gr.Textbox(
291
- label="OSS Steps",
292
- placeholder="16, 29, 52, 96, 129, 158, 172, 183, 189, 200",
293
- value=None,
294
- info="Optimal Steps for the generation. But not test well",
295
- )
296
-
297
- text2music_bnt = gr.Button("Generate", variant="primary")
298
-
299
- with gr.Column():
300
- outputs, input_params_json = create_output_ui()
301
- with gr.Tab("retake"):
302
- retake_variance = gr.Slider(
303
- minimum=0.0, maximum=1.0, step=0.01, value=0.2, label="variance"
304
- )
305
- retake_seeds = gr.Textbox(
306
- label="retake seeds (default None)", placeholder="", value=None
307
- )
308
- retake_bnt = gr.Button("Retake", variant="primary")
309
- retake_outputs, retake_input_params_json = create_output_ui("Retake")
310
-
311
- def retake_process_func(json_data, retake_variance, retake_seeds):
312
- return text2music_process_func(
313
- json_data["audio_duration"],
314
- json_data["prompt"],
315
- json_data["lyrics"],
316
- json_data["infer_step"],
317
- json_data["guidance_scale"],
318
- json_data["scheduler_type"],
319
- json_data["cfg_type"],
320
- json_data["omega_scale"],
321
- ", ".join(map(str, json_data["actual_seeds"])),
322
- json_data["guidance_interval"],
323
- json_data["guidance_interval_decay"],
324
- json_data["min_guidance_scale"],
325
- json_data["use_erg_tag"],
326
- json_data["use_erg_lyric"],
327
- json_data["use_erg_diffusion"],
328
- ", ".join(map(str, json_data["oss_steps"])),
329
- (
330
- json_data["guidance_scale_text"]
331
- if "guidance_scale_text" in json_data
332
- else 0.0
333
- ),
334
- (
335
- json_data["guidance_scale_lyric"]
336
- if "guidance_scale_lyric" in json_data
337
- else 0.0
338
- ),
339
- retake_seeds=retake_seeds,
340
- retake_variance=retake_variance,
341
- task="retake",
342
- lora_name_or_path="none" if "lora_name_or_path" not in json_data else json_data["lora_name_or_path"]
343
- )
344
-
345
- retake_bnt.click(
346
- fn=retake_process_func,
347
- inputs=[
348
- input_params_json,
349
- retake_variance,
350
- retake_seeds,
351
- ],
352
- outputs=retake_outputs + [retake_input_params_json],
353
- )
354
- with gr.Tab("repainting"):
355
- retake_variance = gr.Slider(
356
- minimum=0.0, maximum=1.0, step=0.01, value=0.2, label="variance"
357
- )
358
- retake_seeds = gr.Textbox(
359
- label="repaint seeds (default None)", placeholder="", value=None
360
- )
361
- repaint_start = gr.Slider(
362
- minimum=0.0,
363
- maximum=240.0,
364
- step=0.01,
365
- value=0.0,
366
- label="Repaint Start Time",
367
- interactive=True,
368
- )
369
- repaint_end = gr.Slider(
370
- minimum=0.0,
371
- maximum=240.0,
372
- step=0.01,
373
- value=30.0,
374
- label="Repaint End Time",
375
- interactive=True,
376
- )
377
- repaint_source = gr.Radio(
378
- ["text2music", "last_repaint", "upload"],
379
- value="text2music",
380
- label="Repaint Source",
381
- elem_id="repaint_source",
382
- )
383
-
384
- repaint_source_audio_upload = gr.Audio(
385
- label="Upload Audio",
386
- type="filepath",
387
- visible=False,
388
- elem_id="repaint_source_audio_upload",
389
- show_download_button=True,
390
- )
391
- repaint_source.change(
392
- fn=lambda x: gr.update(
393
- visible=x == "upload", elem_id="repaint_source_audio_upload"
394
- ),
395
- inputs=[repaint_source],
396
- outputs=[repaint_source_audio_upload],
397
- )
398
-
399
- repaint_bnt = gr.Button("Repaint", variant="primary")
400
- repaint_outputs, repaint_input_params_json = create_output_ui("Repaint")
401
-
402
- def repaint_process_func(
403
- text2music_json_data,
404
- repaint_json_data,
405
- retake_variance,
406
- retake_seeds,
407
- repaint_start,
408
- repaint_end,
409
- repaint_source,
410
- repaint_source_audio_upload,
411
- prompt,
412
- lyrics,
413
- infer_step,
414
- guidance_scale,
415
- scheduler_type,
416
- cfg_type,
417
- omega_scale,
418
- manual_seeds,
419
- guidance_interval,
420
- guidance_interval_decay,
421
- min_guidance_scale,
422
- use_erg_tag,
423
- use_erg_lyric,
424
- use_erg_diffusion,
425
- oss_steps,
426
- guidance_scale_text,
427
- guidance_scale_lyric,
428
- ):
429
- if repaint_source == "upload":
430
- src_audio_path = repaint_source_audio_upload
431
- audio_duration = librosa.get_duration(filename=src_audio_path)
432
- json_data = {"audio_duration": audio_duration}
433
- elif repaint_source == "text2music":
434
- json_data = text2music_json_data
435
- src_audio_path = json_data["audio_path"]
436
- elif repaint_source == "last_repaint":
437
- json_data = repaint_json_data
438
- src_audio_path = json_data["audio_path"]
439
-
440
- return text2music_process_func(
441
- json_data["audio_duration"],
442
- prompt,
443
- lyrics,
444
- infer_step,
445
- guidance_scale,
446
- scheduler_type,
447
- cfg_type,
448
- omega_scale,
449
- manual_seeds,
450
- guidance_interval,
451
- guidance_interval_decay,
452
- min_guidance_scale,
453
- use_erg_tag,
454
- use_erg_lyric,
455
- use_erg_diffusion,
456
- oss_steps,
457
- guidance_scale_text,
458
- guidance_scale_lyric,
459
- retake_seeds=retake_seeds,
460
- retake_variance=retake_variance,
461
- task="repaint",
462
- repaint_start=repaint_start,
463
- repaint_end=repaint_end,
464
- src_audio_path=src_audio_path,
465
- lora_name_or_path="none" if "lora_name_or_path" not in json_data else json_data["lora_name_or_path"]
466
- )
467
-
468
- repaint_bnt.click(
469
- fn=repaint_process_func,
470
- inputs=[
471
- input_params_json,
472
- repaint_input_params_json,
473
- retake_variance,
474
- retake_seeds,
475
- repaint_start,
476
- repaint_end,
477
- repaint_source,
478
- repaint_source_audio_upload,
479
- prompt,
480
- lyrics,
481
- infer_step,
482
- guidance_scale,
483
- scheduler_type,
484
- cfg_type,
485
- omega_scale,
486
- manual_seeds,
487
- guidance_interval,
488
- guidance_interval_decay,
489
- min_guidance_scale,
490
- use_erg_tag,
491
- use_erg_lyric,
492
- use_erg_diffusion,
493
- oss_steps,
494
- guidance_scale_text,
495
- guidance_scale_lyric,
496
- ],
497
- outputs=repaint_outputs + [repaint_input_params_json],
498
- )
499
- with gr.Tab("edit"):
500
- edit_prompt = gr.Textbox(lines=2, label="Edit Tags", max_lines=4)
501
- edit_lyrics = gr.Textbox(lines=9, label="Edit Lyrics", max_lines=13)
502
- retake_seeds = gr.Textbox(
503
- label="edit seeds (default None)", placeholder="", value=None
504
- )
505
-
506
- edit_type = gr.Radio(
507
- ["only_lyrics", "remix"],
508
- value="only_lyrics",
509
- label="Edit Type",
510
- elem_id="edit_type",
511
- info="`only_lyrics` will keep the whole song the same except lyrics difference. Make your diffrence smaller, e.g. one lyrc line change.\nremix can change the song melody and genre",
512
- )
513
- edit_n_min = gr.Slider(
514
- minimum=0.0,
515
- maximum=1.0,
516
- step=0.01,
517
- value=0.6,
518
- label="edit_n_min",
519
- interactive=True,
520
- )
521
- edit_n_max = gr.Slider(
522
- minimum=0.0,
523
- maximum=1.0,
524
- step=0.01,
525
- value=1.0,
526
- label="edit_n_max",
527
- interactive=True,
528
- )
529
-
530
- def edit_type_change_func(edit_type):
531
- if edit_type == "only_lyrics":
532
- n_min = 0.6
533
- n_max = 1.0
534
- elif edit_type == "remix":
535
- n_min = 0.2
536
- n_max = 0.4
537
- return n_min, n_max
538
-
539
- edit_type.change(
540
- edit_type_change_func,
541
- inputs=[edit_type],
542
- outputs=[edit_n_min, edit_n_max],
543
- )
544
-
545
- edit_source = gr.Radio(
546
- ["text2music", "last_edit", "upload"],
547
- value="text2music",
548
- label="Edit Source",
549
- elem_id="edit_source",
550
- )
551
- edit_source_audio_upload = gr.Audio(
552
- label="Upload Audio",
553
- type="filepath",
554
- visible=False,
555
- elem_id="edit_source_audio_upload",
556
- show_download_button=True,
557
- )
558
- edit_source.change(
559
- fn=lambda x: gr.update(
560
- visible=x == "upload", elem_id="edit_source_audio_upload"
561
- ),
562
- inputs=[edit_source],
563
- outputs=[edit_source_audio_upload],
564
- )
565
-
566
- edit_bnt = gr.Button("Edit", variant="primary")
567
- edit_outputs, edit_input_params_json = create_output_ui("Edit")
568
-
569
- def edit_process_func(
570
- text2music_json_data,
571
- edit_input_params_json,
572
- edit_source,
573
- edit_source_audio_upload,
574
- prompt,
575
- lyrics,
576
- edit_prompt,
577
- edit_lyrics,
578
- edit_n_min,
579
- edit_n_max,
580
- infer_step,
581
- guidance_scale,
582
- scheduler_type,
583
- cfg_type,
584
- omega_scale,
585
- manual_seeds,
586
- guidance_interval,
587
- guidance_interval_decay,
588
- min_guidance_scale,
589
- use_erg_tag,
590
- use_erg_lyric,
591
- use_erg_diffusion,
592
- oss_steps,
593
- guidance_scale_text,
594
- guidance_scale_lyric,
595
- retake_seeds,
596
- ):
597
- if edit_source == "upload":
598
- src_audio_path = edit_source_audio_upload
599
- audio_duration = librosa.get_duration(filename=src_audio_path)
600
- json_data = {"audio_duration": audio_duration}
601
- elif edit_source == "text2music":
602
- json_data = text2music_json_data
603
- src_audio_path = json_data["audio_path"]
604
- elif edit_source == "last_edit":
605
- json_data = edit_input_params_json
606
- src_audio_path = json_data["audio_path"]
607
-
608
- if not edit_prompt:
609
- edit_prompt = prompt
610
- if not edit_lyrics:
611
- edit_lyrics = lyrics
612
-
613
- return text2music_process_func(
614
- json_data["audio_duration"],
615
- prompt,
616
- lyrics,
617
- infer_step,
618
- guidance_scale,
619
- scheduler_type,
620
- cfg_type,
621
- omega_scale,
622
- manual_seeds,
623
- guidance_interval,
624
- guidance_interval_decay,
625
- min_guidance_scale,
626
- use_erg_tag,
627
- use_erg_lyric,
628
- use_erg_diffusion,
629
- oss_steps,
630
- guidance_scale_text,
631
- guidance_scale_lyric,
632
- task="edit",
633
- src_audio_path=src_audio_path,
634
- edit_target_prompt=edit_prompt,
635
- edit_target_lyrics=edit_lyrics,
636
- edit_n_min=edit_n_min,
637
- edit_n_max=edit_n_max,
638
- retake_seeds=retake_seeds,
639
- lora_name_or_path="none" if "lora_name_or_path" not in json_data else json_data["lora_name_or_path"]
640
- )
641
-
642
- edit_bnt.click(
643
- fn=edit_process_func,
644
- inputs=[
645
- input_params_json,
646
- edit_input_params_json,
647
- edit_source,
648
- edit_source_audio_upload,
649
- prompt,
650
- lyrics,
651
- edit_prompt,
652
- edit_lyrics,
653
- edit_n_min,
654
- edit_n_max,
655
- infer_step,
656
- guidance_scale,
657
- scheduler_type,
658
- cfg_type,
659
- omega_scale,
660
- manual_seeds,
661
- guidance_interval,
662
- guidance_interval_decay,
663
- min_guidance_scale,
664
- use_erg_tag,
665
- use_erg_lyric,
666
- use_erg_diffusion,
667
- oss_steps,
668
- guidance_scale_text,
669
- guidance_scale_lyric,
670
- retake_seeds,
671
- ],
672
- outputs=edit_outputs + [edit_input_params_json],
673
- )
674
- with gr.Tab("extend"):
675
- extend_seeds = gr.Textbox(
676
- label="extend seeds (default None)", placeholder="", value=None
677
- )
678
- left_extend_length = gr.Slider(
679
- minimum=0.0,
680
- maximum=240.0,
681
- step=0.01,
682
- value=0.0,
683
- label="Left Extend Length",
684
- interactive=True,
685
- )
686
- right_extend_length = gr.Slider(
687
- minimum=0.0,
688
- maximum=240.0,
689
- step=0.01,
690
- value=30.0,
691
- label="Right Extend Length",
692
- interactive=True,
693
- )
694
- extend_source = gr.Radio(
695
- ["text2music", "last_extend", "upload"],
696
- value="text2music",
697
- label="Extend Source",
698
- elem_id="extend_source",
699
- )
700
-
701
- extend_source_audio_upload = gr.Audio(
702
- label="Upload Audio",
703
- type="filepath",
704
- visible=False,
705
- elem_id="extend_source_audio_upload",
706
- show_download_button=True,
707
- )
708
- extend_source.change(
709
- fn=lambda x: gr.update(
710
- visible=x == "upload", elem_id="extend_source_audio_upload"
711
- ),
712
- inputs=[extend_source],
713
- outputs=[extend_source_audio_upload],
714
- )
715
-
716
- extend_bnt = gr.Button("Extend", variant="primary")
717
- extend_outputs, extend_input_params_json = create_output_ui("Extend")
718
-
719
- def extend_process_func(
720
- text2music_json_data,
721
- extend_input_params_json,
722
- extend_seeds,
723
- left_extend_length,
724
- right_extend_length,
725
- extend_source,
726
- extend_source_audio_upload,
727
- prompt,
728
- lyrics,
729
- infer_step,
730
- guidance_scale,
731
- scheduler_type,
732
- cfg_type,
733
- omega_scale,
734
- manual_seeds,
735
- guidance_interval,
736
- guidance_interval_decay,
737
- min_guidance_scale,
738
- use_erg_tag,
739
- use_erg_lyric,
740
- use_erg_diffusion,
741
- oss_steps,
742
- guidance_scale_text,
743
- guidance_scale_lyric,
744
- ):
745
- if extend_source == "upload":
746
- src_audio_path = extend_source_audio_upload
747
- # get audio duration
748
- audio_duration = librosa.get_duration(filename=src_audio_path)
749
- json_data = {"audio_duration": audio_duration}
750
- elif extend_source == "text2music":
751
- json_data = text2music_json_data
752
- src_audio_path = json_data["audio_path"]
753
- elif extend_source == "last_extend":
754
- json_data = extend_input_params_json
755
- src_audio_path = json_data["audio_path"]
756
-
757
- repaint_start = -left_extend_length
758
- repaint_end = json_data["audio_duration"] + right_extend_length
759
- return text2music_process_func(
760
- json_data["audio_duration"],
761
- prompt,
762
- lyrics,
763
- infer_step,
764
- guidance_scale,
765
- scheduler_type,
766
- cfg_type,
767
- omega_scale,
768
- manual_seeds,
769
- guidance_interval,
770
- guidance_interval_decay,
771
- min_guidance_scale,
772
- use_erg_tag,
773
- use_erg_lyric,
774
- use_erg_diffusion,
775
- oss_steps,
776
- guidance_scale_text,
777
- guidance_scale_lyric,
778
- retake_seeds=extend_seeds,
779
- retake_variance=1.0,
780
- task="extend",
781
- repaint_start=repaint_start,
782
- repaint_end=repaint_end,
783
- src_audio_path=src_audio_path,
784
- lora_name_or_path="none" if "lora_name_or_path" not in json_data else json_data["lora_name_or_path"]
785
- )
786
-
787
- extend_bnt.click(
788
- fn=extend_process_func,
789
- inputs=[
790
- input_params_json,
791
- extend_input_params_json,
792
- extend_seeds,
793
- left_extend_length,
794
- right_extend_length,
795
- extend_source,
796
- extend_source_audio_upload,
797
- prompt,
798
- lyrics,
799
- infer_step,
800
- guidance_scale,
801
- scheduler_type,
802
- cfg_type,
803
- omega_scale,
804
- manual_seeds,
805
- guidance_interval,
806
- guidance_interval_decay,
807
- min_guidance_scale,
808
- use_erg_tag,
809
- use_erg_lyric,
810
- use_erg_diffusion,
811
- oss_steps,
812
- guidance_scale_text,
813
- guidance_scale_lyric,
814
- ],
815
- outputs=extend_outputs + [extend_input_params_json],
816
- )
817
-
818
- def json2output(json_data):
819
- return (
820
- json_data["audio_duration"],
821
- json_data["prompt"],
822
- json_data["lyrics"],
823
- json_data["infer_step"],
824
- json_data["guidance_scale"],
825
- json_data["scheduler_type"],
826
- json_data["cfg_type"],
827
- json_data["omega_scale"],
828
- ", ".join(map(str, json_data["actual_seeds"])),
829
- json_data["guidance_interval"],
830
- json_data["guidance_interval_decay"],
831
- json_data["min_guidance_scale"],
832
- json_data["use_erg_tag"],
833
- json_data["use_erg_lyric"],
834
- json_data["use_erg_diffusion"],
835
- ", ".join(map(str, json_data["oss_steps"])),
836
- (
837
- json_data["guidance_scale_text"]
838
- if "guidance_scale_text" in json_data
839
- else 0.0
840
- ),
841
- (
842
- json_data["guidance_scale_lyric"]
843
- if "guidance_scale_lyric" in json_data
844
- else 0.0
845
- ),
846
- (
847
- json_data["audio2audio_enable"]
848
- if "audio2audio_enable" in json_data
849
- else False
850
- ),
851
- (
852
- json_data["ref_audio_strength"]
853
- if "ref_audio_strength" in json_data
854
- else 0.5
855
- ),
856
- (
857
- json_data["ref_audio_input"]
858
- if "ref_audio_input" in json_data
859
- else None
860
- ),
861
- )
862
-
863
- def sample_data(lora_name_or_path_):
864
- json_data = sample_data_func(lora_name_or_path_)
865
- return json2output(json_data)
866
-
867
- sample_bnt.click(
868
- sample_data,
869
- inputs=[lora_name_or_path],
870
- outputs=[
871
- audio_duration,
872
- prompt,
873
- lyrics,
874
- infer_step,
875
- guidance_scale,
876
- scheduler_type,
877
- cfg_type,
878
- omega_scale,
879
- manual_seeds,
880
- guidance_interval,
881
- guidance_interval_decay,
882
- min_guidance_scale,
883
- use_erg_tag,
884
- use_erg_lyric,
885
- use_erg_diffusion,
886
- oss_steps,
887
- guidance_scale_text,
888
- guidance_scale_lyric,
889
- audio2audio_enable,
890
- ref_audio_strength,
891
- ref_audio_input,
892
- ],
893
- )
894
-
895
- text2music_bnt.click(
896
- fn=text2music_process_func,
897
- inputs=[
898
- audio_duration,
899
- prompt,
900
- lyrics,
901
- infer_step,
902
- guidance_scale,
903
- scheduler_type,
904
- cfg_type,
905
- omega_scale,
906
- manual_seeds,
907
- guidance_interval,
908
- guidance_interval_decay,
909
- min_guidance_scale,
910
- use_erg_tag,
911
- use_erg_lyric,
912
- use_erg_diffusion,
913
- oss_steps,
914
- guidance_scale_text,
915
- guidance_scale_lyric,
916
- audio2audio_enable,
917
- ref_audio_strength,
918
- ref_audio_input,
919
- lora_name_or_path,
920
- ],
921
- outputs=outputs + [input_params_json],
922
- )
923
-
924
-
925
- def create_main_demo_ui(
926
- text2music_process_func=dump_func,
927
- sample_data_func=dump_func,
928
- load_data_func=dump_func,
929
- ):
930
- with gr.Blocks(
931
- title="ACE-Step Model 1.0 DEMO",
932
- ) as demo:
933
- gr.Markdown(
934
- """
935
- <h1 style="text-align: center;">ACE-Step: A Step Towards Music Generation Foundation Model</h1>
936
- <p>
937
- <a href="https://ace-step.github.io/" target='_blank'>Project</a> |
938
- <a href="https://huggingface.co/ACE-Step/ACE-Step-v1-3.5B">Checkpoints</a> |
939
- <a href="https://discord.gg/rjAZz2xBdG" target='_blank'>Discord</a>
940
- </p>
941
- """
942
- )
943
- with gr.Tab("text2music"):
944
- create_text2music_ui(
945
- gr=gr,
946
- text2music_process_func=text2music_process_func,
947
- sample_data_func=sample_data_func,
948
- load_data_func=load_data_func,
949
- )
950
- return demo
951
-
952
-
953
- if __name__ == "__main__":
954
- demo = create_main_demo_ui()
955
- demo.launch(
956
- server_name="0.0.0.0",
957
- server_port=7860,
958
- )