aifeifei798 commited on
Commit
44dd089
·
verified ·
1 Parent(s): a6e0c21

Update ui/components.py

Browse files
Files changed (1) hide show
  1. ui/components.py +119 -136
ui/components.py CHANGED
@@ -1,8 +1,6 @@
1
  """
2
  ACE-Step: A Step Towards Music Generation Foundation Model
3
-
4
  https://github.com/ace-step/ACE-Step
5
-
6
  Apache 2.0 License
7
  """
8
 
@@ -17,31 +15,26 @@ Neon lights they flicker bright
17
  City hums in dead of night
18
  Rhythms pulse through concrete veins
19
  Lost in echoes of refrains
20
-
21
  [verse]
22
  Bassline groovin' in my chest
23
  Heartbeats match the city's zest
24
  Electric whispers fill the air
25
  Synthesized dreams everywhere
26
-
27
  [chorus]
28
  Turn it up and let it flow
29
  Feel the fire let it grow
30
  In this rhythm we belong
31
  Hear the night sing out our song
32
-
33
  [verse]
34
  Guitar strings they start to weep
35
  Wake the soul from silent sleep
36
  Every note a story told
37
  In this night we’re bold and gold
38
-
39
  [bridge]
40
  Voices blend in harmony
41
  Lost in pure cacophony
42
  Timeless echoes timeless cries
43
  Soulful shouts beneath the skies
44
-
45
  [verse]
46
  Keyboard dances on the keys
47
  Melodies on evening breeze
@@ -51,31 +44,31 @@ In this moment we take flight
51
 
52
  # First, let's define the presets at the top of the file, after the imports
53
  GENRE_PRESETS = {
54
- "Modern Pop": "pop, synth, drums, guitar, 120 bpm, upbeat, catchy, vibrant, female vocals, polished vocals",
55
- "Rock": "rock, electric guitar, drums, bass, 130 bpm, energetic, rebellious, gritty, male vocals, raw vocals",
56
- "Hip Hop": "hip hop, 808 bass, hi-hats, synth, 90 bpm, bold, urban, intense, male vocals, rhythmic vocals",
57
- "Country": "country, acoustic guitar, steel guitar, fiddle, 100 bpm, heartfelt, rustic, warm, male vocals, twangy vocals",
58
- "EDM": "edm, synth, bass, kick drum, 128 bpm, euphoric, pulsating, energetic, instrumental",
59
- "Reggae": "reggae, guitar, bass, drums, 80 bpm, chill, soulful, positive, male vocals, smooth vocals",
60
- "Classical": "classical, orchestral, strings, piano, 60 bpm, elegant, emotive, timeless, instrumental",
61
- "Jazz": "jazz, saxophone, piano, double bass, 110 bpm, smooth, improvisational, soulful, male vocals, crooning vocals",
62
- "Metal": "metal, electric guitar, double kick drum, bass, 160 bpm, aggressive, intense, heavy, male vocals, screamed vocals",
63
- "R&B": "r&b, synth, bass, drums, 85 bpm, sultry, groovy, romantic, female vocals, silky vocals"
64
  }
65
 
66
  # Add this function to handle preset selection
67
  def update_tags_from_preset(preset_name):
68
- if preset_name == "Custom":
69
  return ""
70
  return GENRE_PRESETS.get(preset_name, "")
71
 
72
 
73
- def create_output_ui(task_name="Text2Music"):
74
  # For many consumer-grade GPU devices, only one batch can be run
75
- output_audio1 = gr.Audio(type="filepath", label=f"{task_name} Generated Audio 1")
76
  # output_audio2 = gr.Audio(type="filepath", label="Generated Audio 2")
77
- with gr.Accordion(f"{task_name} Parameters", open=False):
78
- input_params_json = gr.JSON(label=f"{task_name} Parameters")
79
  # outputs = [output_audio1, output_audio2]
80
  outputs = [output_audio1]
81
  return outputs, input_params_json
@@ -102,26 +95,26 @@ def create_text2music_ui(
102
  240.0,
103
  step=0.00001,
104
  value=-1,
105
- label="Audio Duration",
106
  interactive=True,
107
- info="-1 means random duration (30 ~ 240).",
108
  scale=9,
109
  )
110
- sample_bnt = gr.Button("Sample", variant="secondary", scale=1)
111
 
112
  # audio2audio
113
  with gr.Row(equal_height=True):
114
- audio2audio_enable = gr.Checkbox(label="Enable Audio2Audio", value=False, info="Check to enable Audio-to-Audio generation using a reference audio.", elem_id="audio2audio_checkbox")
115
  lora_name_or_path = gr.Dropdown(
116
- label="Lora Name or Path",
117
  choices=["ACE-Step/ACE-Step-v1-chinese-rap-LoRA", "none"],
118
  value="none",
119
  allow_custom_value=True,
120
  )
121
 
122
- ref_audio_input = gr.Audio(type="filepath", label="Reference Audio (for Audio2Audio)", visible=False, elem_id="ref_audio_input", show_download_button=True)
123
  ref_audio_strength = gr.Slider(
124
- label="Refer audio strength",
125
  minimum=0.0,
126
  maximum=1.0,
127
  step=0.01,
@@ -145,17 +138,17 @@ def create_text2music_ui(
145
 
146
  with gr.Column(scale=2):
147
  with gr.Group():
148
- gr.Markdown("""<center>Support tags, descriptions, and scene. Use commas to separate different tags.<br>Tags and lyrics examples are from AI music generation community.</center>""")
149
  with gr.Row():
150
  genre_preset = gr.Dropdown(
151
- choices=["Custom"] + list(GENRE_PRESETS.keys()),
152
- value="Custom",
153
- label="Preset",
154
  scale=1,
155
  )
156
  prompt = gr.Textbox(
157
  lines=1,
158
- label="Tags",
159
  max_lines=4,
160
  value=TAG_DEFAULT,
161
  scale=9,
@@ -168,21 +161,21 @@ def create_text2music_ui(
168
  outputs=[prompt]
169
  )
170
  with gr.Group():
171
- gr.Markdown("""<center>Support lyric structure tags like [verse], [chorus], and [bridge] to separate different parts of the lyrics.<br>Use [instrumental] or [inst] to generate instrumental music. Not support genre structure tag in lyrics</center>""")
172
  lyrics = gr.Textbox(
173
  lines=9,
174
- label="Lyrics",
175
  max_lines=13,
176
  value=LYRIC_DEFAULT,
177
  )
178
 
179
- with gr.Accordion("Basic Settings", open=False):
180
  infer_step = gr.Slider(
181
  minimum=1,
182
  maximum=200,
183
  step=1,
184
  value=60,
185
- label="Infer Steps",
186
  interactive=True,
187
  )
188
  guidance_scale = gr.Slider(
@@ -190,64 +183,64 @@ def create_text2music_ui(
190
  maximum=30.0,
191
  step=0.1,
192
  value=15.0,
193
- label="Guidance Scale",
194
  interactive=True,
195
- info="When guidance_scale_lyric > 1 and guidance_scale_text > 1, the guidance scale will not be applied.",
196
  )
197
  guidance_scale_text = gr.Slider(
198
  minimum=0.0,
199
  maximum=10.0,
200
  step=0.1,
201
  value=0.0,
202
- label="Guidance Scale Text",
203
  interactive=True,
204
- info="Guidance scale for text condition. It can only apply to cfg. set guidance_scale_text=5.0, guidance_scale_lyric=1.5 for start",
205
  )
206
  guidance_scale_lyric = gr.Slider(
207
  minimum=0.0,
208
  maximum=10.0,
209
  step=0.1,
210
  value=0.0,
211
- label="Guidance Scale Lyric",
212
  interactive=True,
213
  )
214
 
215
  manual_seeds = gr.Textbox(
216
- label="manual seeds (default None)",
217
  placeholder="1,2,3,4",
218
  value=None,
219
- info="Seed for the generation",
220
  )
221
 
222
- with gr.Accordion("Advanced Settings", open=False):
223
  scheduler_type = gr.Radio(
224
  ["euler", "heun"],
225
  value="euler",
226
- label="Scheduler Type",
227
  elem_id="scheduler_type",
228
- info="Scheduler type for the generation. euler is recommended. heun will take more time.",
229
  )
230
  cfg_type = gr.Radio(
231
  ["cfg", "apg", "cfg_star"],
232
  value="apg",
233
- label="CFG Type",
234
  elem_id="cfg_type",
235
- info="CFG type for the generation. apg is recommended. cfg and cfg_star are almost the same.",
236
  )
237
  use_erg_tag = gr.Checkbox(
238
- label="use ERG for tag",
239
  value=True,
240
- info="Use Entropy Rectifying Guidance for tag. It will multiple a temperature to the attention to make a weaker tag condition and make better diversity.",
241
  )
242
  use_erg_lyric = gr.Checkbox(
243
- label="use ERG for lyric",
244
  value=False,
245
- info="The same but apply to lyric encoder's attention.",
246
  )
247
  use_erg_diffusion = gr.Checkbox(
248
- label="use ERG for diffusion",
249
  value=True,
250
- info="The same but apply to diffusion model's attention.",
251
  )
252
 
253
  omega_scale = gr.Slider(
@@ -255,9 +248,9 @@ def create_text2music_ui(
255
  maximum=100.0,
256
  step=0.1,
257
  value=10.0,
258
- label="Granularity Scale",
259
  interactive=True,
260
- info="Granularity scale for the generation. Higher values can reduce artifacts",
261
  )
262
 
263
  guidance_interval = gr.Slider(
@@ -265,48 +258,48 @@ def create_text2music_ui(
265
  maximum=1.0,
266
  step=0.01,
267
  value=0.5,
268
- label="Guidance Interval",
269
  interactive=True,
270
- info="Guidance interval for the generation. 0.5 means only apply guidance in the middle steps (0.25 * infer_steps to 0.75 * infer_steps)",
271
  )
272
  guidance_interval_decay = gr.Slider(
273
  minimum=0.0,
274
  maximum=1.0,
275
  step=0.01,
276
  value=0.0,
277
- label="Guidance Interval Decay",
278
  interactive=True,
279
- info="Guidance interval decay for the generation. Guidance scale will decay from guidance_scale to min_guidance_scale in the interval. 0.0 means no decay.",
280
  )
281
  min_guidance_scale = gr.Slider(
282
  minimum=0.0,
283
  maximum=200.0,
284
  step=0.1,
285
  value=3.0,
286
- label="Min Guidance Scale",
287
  interactive=True,
288
- info="Min guidance scale for guidance interval decay's end scale",
289
  )
290
  oss_steps = gr.Textbox(
291
- label="OSS Steps",
292
  placeholder="16, 29, 52, 96, 129, 158, 172, 183, 189, 200",
293
  value=None,
294
- info="Optimal Steps for the generation. But not test well",
295
  )
296
 
297
- text2music_bnt = gr.Button("Generate", variant="primary")
298
 
299
  with gr.Column():
300
  outputs, input_params_json = create_output_ui()
301
- with gr.Tab("retake"):
302
  retake_variance = gr.Slider(
303
- minimum=0.0, maximum=1.0, step=0.01, value=0.2, label="variance"
304
  )
305
  retake_seeds = gr.Textbox(
306
- label="retake seeds (default None)", placeholder="", value=None
307
  )
308
- retake_bnt = gr.Button("Retake", variant="primary")
309
- retake_outputs, retake_input_params_json = create_output_ui("Retake")
310
 
311
  def retake_process_func(json_data, retake_variance, retake_seeds):
312
  return text2music_process_func(
@@ -351,19 +344,19 @@ def create_text2music_ui(
351
  ],
352
  outputs=retake_outputs + [retake_input_params_json],
353
  )
354
- with gr.Tab("repainting"):
355
  retake_variance = gr.Slider(
356
- minimum=0.0, maximum=1.0, step=0.01, value=0.2, label="variance"
357
  )
358
  retake_seeds = gr.Textbox(
359
- label="repaint seeds (default None)", placeholder="", value=None
360
  )
361
  repaint_start = gr.Slider(
362
  minimum=0.0,
363
  maximum=240.0,
364
  step=0.01,
365
  value=0.0,
366
- label="Repaint Start Time",
367
  interactive=True,
368
  )
369
  repaint_end = gr.Slider(
@@ -371,18 +364,18 @@ def create_text2music_ui(
371
  maximum=240.0,
372
  step=0.01,
373
  value=30.0,
374
- label="Repaint End Time",
375
  interactive=True,
376
  )
377
  repaint_source = gr.Radio(
378
- ["text2music", "last_repaint", "upload"],
379
- value="text2music",
380
- label="Repaint Source",
381
  elem_id="repaint_source",
382
  )
383
 
384
  repaint_source_audio_upload = gr.Audio(
385
- label="Upload Audio",
386
  type="filepath",
387
  visible=False,
388
  elem_id="repaint_source_audio_upload",
@@ -390,14 +383,14 @@ def create_text2music_ui(
390
  )
391
  repaint_source.change(
392
  fn=lambda x: gr.update(
393
- visible=x == "upload", elem_id="repaint_source_audio_upload"
394
  ),
395
  inputs=[repaint_source],
396
  outputs=[repaint_source_audio_upload],
397
  )
398
 
399
- repaint_bnt = gr.Button("Repaint", variant="primary")
400
- repaint_outputs, repaint_input_params_json = create_output_ui("Repaint")
401
 
402
  def repaint_process_func(
403
  text2music_json_data,
@@ -426,14 +419,14 @@ def create_text2music_ui(
426
  guidance_scale_text,
427
  guidance_scale_lyric,
428
  ):
429
- if repaint_source == "upload":
430
  src_audio_path = repaint_source_audio_upload
431
  audio_duration = librosa.get_duration(filename=src_audio_path)
432
  json_data = {"audio_duration": audio_duration}
433
- elif repaint_source == "text2music":
434
  json_data = text2music_json_data
435
  src_audio_path = json_data["audio_path"]
436
- elif repaint_source == "last_repaint":
437
  json_data = repaint_json_data
438
  src_audio_path = json_data["audio_path"]
439
 
@@ -496,26 +489,26 @@ def create_text2music_ui(
496
  ],
497
  outputs=repaint_outputs + [repaint_input_params_json],
498
  )
499
- with gr.Tab("edit"):
500
- edit_prompt = gr.Textbox(lines=2, label="Edit Tags", max_lines=4)
501
- edit_lyrics = gr.Textbox(lines=9, label="Edit Lyrics", max_lines=13)
502
  retake_seeds = gr.Textbox(
503
- label="edit seeds (default None)", placeholder="", value=None
504
  )
505
 
506
  edit_type = gr.Radio(
507
- ["only_lyrics", "remix"],
508
- value="only_lyrics",
509
- label="Edit Type",
510
  elem_id="edit_type",
511
- info="`only_lyrics` will keep the whole song the same except lyrics difference. Make your diffrence smaller, e.g. one lyrc line change.\nremix can change the song melody and genre",
512
  )
513
  edit_n_min = gr.Slider(
514
  minimum=0.0,
515
  maximum=1.0,
516
  step=0.01,
517
  value=0.6,
518
- label="edit_n_min",
519
  interactive=True,
520
  )
521
  edit_n_max = gr.Slider(
@@ -523,15 +516,15 @@ def create_text2music_ui(
523
  maximum=1.0,
524
  step=0.01,
525
  value=1.0,
526
- label="edit_n_max",
527
  interactive=True,
528
  )
529
 
530
  def edit_type_change_func(edit_type):
531
- if edit_type == "only_lyrics":
532
  n_min = 0.6
533
  n_max = 1.0
534
- elif edit_type == "remix":
535
  n_min = 0.2
536
  n_max = 0.4
537
  return n_min, n_max
@@ -543,13 +536,13 @@ def create_text2music_ui(
543
  )
544
 
545
  edit_source = gr.Radio(
546
- ["text2music", "last_edit", "upload"],
547
- value="text2music",
548
- label="Edit Source",
549
  elem_id="edit_source",
550
  )
551
  edit_source_audio_upload = gr.Audio(
552
- label="Upload Audio",
553
  type="filepath",
554
  visible=False,
555
  elem_id="edit_source_audio_upload",
@@ -557,14 +550,14 @@ def create_text2music_ui(
557
  )
558
  edit_source.change(
559
  fn=lambda x: gr.update(
560
- visible=x == "upload", elem_id="edit_source_audio_upload"
561
  ),
562
  inputs=[edit_source],
563
  outputs=[edit_source_audio_upload],
564
  )
565
 
566
- edit_bnt = gr.Button("Edit", variant="primary")
567
- edit_outputs, edit_input_params_json = create_output_ui("Edit")
568
 
569
  def edit_process_func(
570
  text2music_json_data,
@@ -594,14 +587,14 @@ def create_text2music_ui(
594
  guidance_scale_lyric,
595
  retake_seeds,
596
  ):
597
- if edit_source == "upload":
598
  src_audio_path = edit_source_audio_upload
599
  audio_duration = librosa.get_duration(filename=src_audio_path)
600
  json_data = {"audio_duration": audio_duration}
601
- elif edit_source == "text2music":
602
  json_data = text2music_json_data
603
  src_audio_path = json_data["audio_path"]
604
- elif edit_source == "last_edit":
605
  json_data = edit_input_params_json
606
  src_audio_path = json_data["audio_path"]
607
 
@@ -671,16 +664,16 @@ def create_text2music_ui(
671
  ],
672
  outputs=edit_outputs + [edit_input_params_json],
673
  )
674
- with gr.Tab("extend"):
675
  extend_seeds = gr.Textbox(
676
- label="extend seeds (default None)", placeholder="", value=None
677
  )
678
  left_extend_length = gr.Slider(
679
  minimum=0.0,
680
  maximum=240.0,
681
  step=0.01,
682
  value=0.0,
683
- label="Left Extend Length",
684
  interactive=True,
685
  )
686
  right_extend_length = gr.Slider(
@@ -688,18 +681,18 @@ def create_text2music_ui(
688
  maximum=240.0,
689
  step=0.01,
690
  value=30.0,
691
- label="Right Extend Length",
692
  interactive=True,
693
  )
694
  extend_source = gr.Radio(
695
- ["text2music", "last_extend", "upload"],
696
- value="text2music",
697
- label="Extend Source",
698
  elem_id="extend_source",
699
  )
700
 
701
  extend_source_audio_upload = gr.Audio(
702
- label="Upload Audio",
703
  type="filepath",
704
  visible=False,
705
  elem_id="extend_source_audio_upload",
@@ -707,14 +700,14 @@ def create_text2music_ui(
707
  )
708
  extend_source.change(
709
  fn=lambda x: gr.update(
710
- visible=x == "upload", elem_id="extend_source_audio_upload"
711
  ),
712
  inputs=[extend_source],
713
  outputs=[extend_source_audio_upload],
714
  )
715
 
716
- extend_bnt = gr.Button("Extend", variant="primary")
717
- extend_outputs, extend_input_params_json = create_output_ui("Extend")
718
 
719
  def extend_process_func(
720
  text2music_json_data,
@@ -742,15 +735,15 @@ def create_text2music_ui(
742
  guidance_scale_text,
743
  guidance_scale_lyric,
744
  ):
745
- if extend_source == "upload":
746
  src_audio_path = extend_source_audio_upload
747
  # get audio duration
748
  audio_duration = librosa.get_duration(filename=src_audio_path)
749
  json_data = {"audio_duration": audio_duration}
750
- elif extend_source == "text2music":
751
  json_data = text2music_json_data
752
  src_audio_path = json_data["audio_path"]
753
- elif extend_source == "last_extend":
754
  json_data = extend_input_params_json
755
  src_audio_path = json_data["audio_path"]
756
 
@@ -928,19 +921,9 @@ def create_main_demo_ui(
928
  load_data_func=dump_func,
929
  ):
930
  with gr.Blocks(
931
- title="ACE-Step Model 1.0 DEMO",
932
  ) as demo:
933
- gr.Markdown(
934
- """
935
- <h1 style="text-align: center;">ACE-Step: A Step Towards Music Generation Foundation Model</h1>
936
- <p>
937
- <a href="https://ace-step.github.io/" target='_blank'>Project</a> |
938
- <a href="https://huggingface.co/ACE-Step/ACE-Step-v1-3.5B">Checkpoints</a> |
939
- <a href="https://discord.gg/rjAZz2xBdG" target='_blank'>Discord</a>
940
- </p>
941
- """
942
- )
943
- with gr.Tab("text2music"):
944
  create_text2music_ui(
945
  gr=gr,
946
  text2music_process_func=text2music_process_func,
@@ -955,4 +938,4 @@ if __name__ == "__main__":
955
  demo.launch(
956
  server_name="0.0.0.0",
957
  server_port=7860,
958
- )
 
1
  """
2
  ACE-Step: A Step Towards Music Generation Foundation Model
 
3
  https://github.com/ace-step/ACE-Step
 
4
  Apache 2.0 License
5
  """
6
 
 
15
  City hums in dead of night
16
  Rhythms pulse through concrete veins
17
  Lost in echoes of refrains
 
18
  [verse]
19
  Bassline groovin' in my chest
20
  Heartbeats match the city's zest
21
  Electric whispers fill the air
22
  Synthesized dreams everywhere
 
23
  [chorus]
24
  Turn it up and let it flow
25
  Feel the fire let it grow
26
  In this rhythm we belong
27
  Hear the night sing out our song
 
28
  [verse]
29
  Guitar strings they start to weep
30
  Wake the soul from silent sleep
31
  Every note a story told
32
  In this night we’re bold and gold
 
33
  [bridge]
34
  Voices blend in harmony
35
  Lost in pure cacophony
36
  Timeless echoes timeless cries
37
  Soulful shouts beneath the skies
 
38
  [verse]
39
  Keyboard dances on the keys
40
  Melodies on evening breeze
 
44
 
45
  # First, let's define the presets at the top of the file, after the imports
46
  GENRE_PRESETS = {
47
+ "现代流行 (Modern Pop)": "pop, synth, drums, guitar, 120 bpm, upbeat, catchy, vibrant, female vocals, polished vocals",
48
+ "摇滚 (Rock)": "rock, electric guitar, drums, bass, 130 bpm, energetic, rebellious, gritty, male vocals, raw vocals",
49
+ "嘻哈 (Hip Hop)": "hip hop, 808 bass, hi-hats, synth, 90 bpm, bold, urban, intense, male vocals, rhythmic vocals",
50
+ "乡村 (Country)": "country, acoustic guitar, steel guitar, fiddle, 100 bpm, heartfelt, rustic, warm, male vocals, twangy vocals",
51
+ "电子舞曲 (EDM)": "edm, synth, bass, kick drum, 128 bpm, euphoric, pulsating, energetic, instrumental",
52
+ "雷鬼 (Reggae)": "reggae, guitar, bass, drums, 80 bpm, chill, soulful, positive, male vocals, smooth vocals",
53
+ "古典 (Classical)": "classical, orchestral, strings, piano, 60 bpm, elegant, emotive, timeless, instrumental",
54
+ "爵士 (Jazz)": "jazz, saxophone, piano, double bass, 110 bpm, smooth, improvisational, soulful, male vocals, crooning vocals",
55
+ "金属 (Metal)": "metal, electric guitar, double kick drum, bass, 160 bpm, aggressive, intense, heavy, male vocals, screamed vocals",
56
+ "R&B (R&B)": "r&b, synth, bass, drums, 85 bpm, sultry, groovy, romantic, female vocals, silky vocals"
57
  }
58
 
59
  # Add this function to handle preset selection
60
  def update_tags_from_preset(preset_name):
61
+ if preset_name == "自定义 (Custom)":
62
  return ""
63
  return GENRE_PRESETS.get(preset_name, "")
64
 
65
 
66
+ def create_output_ui(task_name="文本转音乐"):
67
  # For many consumer-grade GPU devices, only one batch can be run
68
+ output_audio1 = gr.Audio(type="filepath", label=f"{task_name} 生成的音频 1")
69
  # output_audio2 = gr.Audio(type="filepath", label="Generated Audio 2")
70
+ with gr.Accordion(f"{task_name} 参数", open=False):
71
+ input_params_json = gr.JSON(label=f"{task_name} 参数")
72
  # outputs = [output_audio1, output_audio2]
73
  outputs = [output_audio1]
74
  return outputs, input_params_json
 
95
  240.0,
96
  step=0.00001,
97
  value=-1,
98
+ label="音频时长",
99
  interactive=True,
100
+ info="-1 表示随机时长 (30 ~ 240)",
101
  scale=9,
102
  )
103
+ sample_bnt = gr.Button("示例", variant="secondary", scale=1)
104
 
105
  # audio2audio
106
  with gr.Row(equal_height=True):
107
+ audio2audio_enable = gr.Checkbox(label="启用音频到音频生成", value=False, info="勾选以使用参考音频进行音频到音频生成。", elem_id="audio2audio_checkbox")
108
  lora_name_or_path = gr.Dropdown(
109
+ label="Lora 模型名称或路径",
110
  choices=["ACE-Step/ACE-Step-v1-chinese-rap-LoRA", "none"],
111
  value="none",
112
  allow_custom_value=True,
113
  )
114
 
115
+ ref_audio_input = gr.Audio(type="filepath", label="参考音频 (用于音频到音频生成)", visible=False, elem_id="ref_audio_input", show_download_button=True)
116
  ref_audio_strength = gr.Slider(
117
+ label="参考音频强度",
118
  minimum=0.0,
119
  maximum=1.0,
120
  step=0.01,
 
138
 
139
  with gr.Column(scale=2):
140
  with gr.Group():
141
+ gr.Markdown("""<center>支持标签、描述和场景。使用逗号分隔不同的标签。<br>标签和歌词示例来自AI音乐生成社区。</center>""")
142
  with gr.Row():
143
  genre_preset = gr.Dropdown(
144
+ choices=["自定义 (Custom)"] + list(GENRE_PRESETS.keys()),
145
+ value="自定义 (Custom)",
146
+ label="预设",
147
  scale=1,
148
  )
149
  prompt = gr.Textbox(
150
  lines=1,
151
+ label="标签",
152
  max_lines=4,
153
  value=TAG_DEFAULT,
154
  scale=9,
 
161
  outputs=[prompt]
162
  )
163
  with gr.Group():
164
+ gr.Markdown("""<center>支持歌词结构标签,如 [verse][chorus] [bridge] 来分隔歌词的不同部分。<br>使用 [instrumental] [inst] 生成纯音乐。不支持歌词中的流派结构标签。</center>""")
165
  lyrics = gr.Textbox(
166
  lines=9,
167
+ label="歌词",
168
  max_lines=13,
169
  value=LYRIC_DEFAULT,
170
  )
171
 
172
+ with gr.Accordion("基本设置", open=False):
173
  infer_step = gr.Slider(
174
  minimum=1,
175
  maximum=200,
176
  step=1,
177
  value=60,
178
+ label="推理步数",
179
  interactive=True,
180
  )
181
  guidance_scale = gr.Slider(
 
183
  maximum=30.0,
184
  step=0.1,
185
  value=15.0,
186
+ label="引导尺度",
187
  interactive=True,
188
+ info=" guidance_scale_lyric > 1 guidance_scale_text > 1 时,不应用引导尺度。",
189
  )
190
  guidance_scale_text = gr.Slider(
191
  minimum=0.0,
192
  maximum=10.0,
193
  step=0.1,
194
  value=0.0,
195
+ label="文本引导尺度",
196
  interactive=True,
197
+ info="文本条件的引导尺度。仅适用于 cfg。建议设置 guidance_scale_text=5.0, guidance_scale_lyric=1.5 作为开始。",
198
  )
199
  guidance_scale_lyric = gr.Slider(
200
  minimum=0.0,
201
  maximum=10.0,
202
  step=0.1,
203
  value=0.0,
204
+ label="歌词引导尺度",
205
  interactive=True,
206
  )
207
 
208
  manual_seeds = gr.Textbox(
209
+ label="手动种子 (默认为无)",
210
  placeholder="1,2,3,4",
211
  value=None,
212
+ info="生成种子",
213
  )
214
 
215
+ with gr.Accordion("高级设置", open=False):
216
  scheduler_type = gr.Radio(
217
  ["euler", "heun"],
218
  value="euler",
219
+ label="调度器类型",
220
  elem_id="scheduler_type",
221
+ info="生成调度器类型。推荐使用 eulerheun 将花费更多时间。",
222
  )
223
  cfg_type = gr.Radio(
224
  ["cfg", "apg", "cfg_star"],
225
  value="apg",
226
+ label="CFG 类型",
227
  elem_id="cfg_type",
228
+ info="生成 CFG 类型。推荐使用 apgcfg cfg_star 几乎相同。",
229
  )
230
  use_erg_tag = gr.Checkbox(
231
+ label="对标签使用 ERG",
232
  value=True,
233
+ info="对标签使用熵校正引导。它将注意力乘以一个温度,以减弱标签条件并提高多样性。",
234
  )
235
  use_erg_lyric = gr.Checkbox(
236
+ label="对歌词使用 ERG",
237
  value=False,
238
+ info="同上,但应用于歌词编码器的注意力。",
239
  )
240
  use_erg_diffusion = gr.Checkbox(
241
+ label="对扩散模型使用 ERG",
242
  value=True,
243
+ info="同上,但应用于扩散模型的注意力。",
244
  )
245
 
246
  omega_scale = gr.Slider(
 
248
  maximum=100.0,
249
  step=0.1,
250
  value=10.0,
251
+ label="粒度尺度",
252
  interactive=True,
253
+ info="生成粒度尺度。值越高可以减少伪影。",
254
  )
255
 
256
  guidance_interval = gr.Slider(
 
258
  maximum=1.0,
259
  step=0.01,
260
  value=0.5,
261
+ label="引导间隔",
262
  interactive=True,
263
+ info="生成引导间隔。0.5 表示仅在中间步骤应用引导 (0.25 * 推理步数 0.75 * 推理步数)",
264
  )
265
  guidance_interval_decay = gr.Slider(
266
  minimum=0.0,
267
  maximum=1.0,
268
  step=0.01,
269
  value=0.0,
270
+ label="引导间隔衰减",
271
  interactive=True,
272
+ info="生成引导间隔衰减。引导尺度将在此间隔内从 guidance_scale 衰减到 min_guidance_scale0.0 表示不衰减。",
273
  )
274
  min_guidance_scale = gr.Slider(
275
  minimum=0.0,
276
  maximum=200.0,
277
  step=0.1,
278
  value=3.0,
279
+ label="最小引导尺度",
280
  interactive=True,
281
+ info="引导间隔衰减结束时的最小引导尺度。",
282
  )
283
  oss_steps = gr.Textbox(
284
+ label="OSS 步数",
285
  placeholder="16, 29, 52, 96, 129, 158, 172, 183, 189, 200",
286
  value=None,
287
+ info="生成的最优步数。但未充分测试。",
288
  )
289
 
290
+ text2music_bnt = gr.Button("生成", variant="primary")
291
 
292
  with gr.Column():
293
  outputs, input_params_json = create_output_ui()
294
+ with gr.Tab("重试"):
295
  retake_variance = gr.Slider(
296
+ minimum=0.0, maximum=1.0, step=0.01, value=0.2, label="方差"
297
  )
298
  retake_seeds = gr.Textbox(
299
+ label="重试种子 (默认为无)", placeholder="", value=None
300
  )
301
+ retake_bnt = gr.Button("重试", variant="primary")
302
+ retake_outputs, retake_input_params_json = create_output_ui("重试")
303
 
304
  def retake_process_func(json_data, retake_variance, retake_seeds):
305
  return text2music_process_func(
 
344
  ],
345
  outputs=retake_outputs + [retake_input_params_json],
346
  )
347
+ with gr.Tab("重绘"):
348
  retake_variance = gr.Slider(
349
+ minimum=0.0, maximum=1.0, step=0.01, value=0.2, label="方差"
350
  )
351
  retake_seeds = gr.Textbox(
352
+ label="重绘种子 (默认为无)", placeholder="", value=None
353
  )
354
  repaint_start = gr.Slider(
355
  minimum=0.0,
356
  maximum=240.0,
357
  step=0.01,
358
  value=0.0,
359
+ label="重绘开始时间",
360
  interactive=True,
361
  )
362
  repaint_end = gr.Slider(
 
364
  maximum=240.0,
365
  step=0.01,
366
  value=30.0,
367
+ label="重绘结束时间",
368
  interactive=True,
369
  )
370
  repaint_source = gr.Radio(
371
+ ["文本转音乐", "上次重绘", "上传"],
372
+ value="文本转音乐",
373
+ label="重绘来源",
374
  elem_id="repaint_source",
375
  )
376
 
377
  repaint_source_audio_upload = gr.Audio(
378
+ label="上传音频",
379
  type="filepath",
380
  visible=False,
381
  elem_id="repaint_source_audio_upload",
 
383
  )
384
  repaint_source.change(
385
  fn=lambda x: gr.update(
386
+ visible=x == "上传", elem_id="repaint_source_audio_upload"
387
  ),
388
  inputs=[repaint_source],
389
  outputs=[repaint_source_audio_upload],
390
  )
391
 
392
+ repaint_bnt = gr.Button("重绘", variant="primary")
393
+ repaint_outputs, repaint_input_params_json = create_output_ui("重绘")
394
 
395
  def repaint_process_func(
396
  text2music_json_data,
 
419
  guidance_scale_text,
420
  guidance_scale_lyric,
421
  ):
422
+ if repaint_source == "上传":
423
  src_audio_path = repaint_source_audio_upload
424
  audio_duration = librosa.get_duration(filename=src_audio_path)
425
  json_data = {"audio_duration": audio_duration}
426
+ elif repaint_source == "文本转音乐":
427
  json_data = text2music_json_data
428
  src_audio_path = json_data["audio_path"]
429
+ elif repaint_source == "上次重绘":
430
  json_data = repaint_json_data
431
  src_audio_path = json_data["audio_path"]
432
 
 
489
  ],
490
  outputs=repaint_outputs + [repaint_input_params_json],
491
  )
492
+ with gr.Tab("编辑"):
493
+ edit_prompt = gr.Textbox(lines=2, label="编辑标签", max_lines=4)
494
+ edit_lyrics = gr.Textbox(lines=9, label="编辑歌词", max_lines=13)
495
  retake_seeds = gr.Textbox(
496
+ label="编辑种子 (默认为无)", placeholder="", value=None
497
  )
498
 
499
  edit_type = gr.Radio(
500
+ ["仅歌词", "混音"],
501
+ value="仅歌词",
502
+ label="编辑类型",
503
  elem_id="edit_type",
504
+ info="`仅歌词` 将保持整首歌相同,除了歌词差异。使您的差异更小,例如更改一行歌词。\n`混音` 可以改变歌曲的旋律和流派。",
505
  )
506
  edit_n_min = gr.Slider(
507
  minimum=0.0,
508
  maximum=1.0,
509
  step=0.01,
510
  value=0.6,
511
+ label="编辑 n_min",
512
  interactive=True,
513
  )
514
  edit_n_max = gr.Slider(
 
516
  maximum=1.0,
517
  step=0.01,
518
  value=1.0,
519
+ label="编辑 n_max",
520
  interactive=True,
521
  )
522
 
523
  def edit_type_change_func(edit_type):
524
+ if edit_type == "仅歌词":
525
  n_min = 0.6
526
  n_max = 1.0
527
+ elif edit_type == "混音":
528
  n_min = 0.2
529
  n_max = 0.4
530
  return n_min, n_max
 
536
  )
537
 
538
  edit_source = gr.Radio(
539
+ ["文本转音乐", "上次编辑", "上传"],
540
+ value="文本转音乐",
541
+ label="编辑来源",
542
  elem_id="edit_source",
543
  )
544
  edit_source_audio_upload = gr.Audio(
545
+ label="上传音频",
546
  type="filepath",
547
  visible=False,
548
  elem_id="edit_source_audio_upload",
 
550
  )
551
  edit_source.change(
552
  fn=lambda x: gr.update(
553
+ visible=x == "上传", elem_id="edit_source_audio_upload"
554
  ),
555
  inputs=[edit_source],
556
  outputs=[edit_source_audio_upload],
557
  )
558
 
559
+ edit_bnt = gr.Button("编辑", variant="primary")
560
+ edit_outputs, edit_input_params_json = create_output_ui("编辑")
561
 
562
  def edit_process_func(
563
  text2music_json_data,
 
587
  guidance_scale_lyric,
588
  retake_seeds,
589
  ):
590
+ if edit_source == "上传":
591
  src_audio_path = edit_source_audio_upload
592
  audio_duration = librosa.get_duration(filename=src_audio_path)
593
  json_data = {"audio_duration": audio_duration}
594
+ elif edit_source == "文本转音乐":
595
  json_data = text2music_json_data
596
  src_audio_path = json_data["audio_path"]
597
+ elif edit_source == "上次编辑":
598
  json_data = edit_input_params_json
599
  src_audio_path = json_data["audio_path"]
600
 
 
664
  ],
665
  outputs=edit_outputs + [edit_input_params_json],
666
  )
667
+ with gr.Tab("扩展"):
668
  extend_seeds = gr.Textbox(
669
+ label="扩展种子 (默认为无)", placeholder="", value=None
670
  )
671
  left_extend_length = gr.Slider(
672
  minimum=0.0,
673
  maximum=240.0,
674
  step=0.01,
675
  value=0.0,
676
+ label="左侧扩展长度",
677
  interactive=True,
678
  )
679
  right_extend_length = gr.Slider(
 
681
  maximum=240.0,
682
  step=0.01,
683
  value=30.0,
684
+ label="右侧扩展长度",
685
  interactive=True,
686
  )
687
  extend_source = gr.Radio(
688
+ ["文本转音乐", "上次扩展", "上传"],
689
+ value="文本转音乐",
690
+ label="扩展来源",
691
  elem_id="extend_source",
692
  )
693
 
694
  extend_source_audio_upload = gr.Audio(
695
+ label="上传音频",
696
  type="filepath",
697
  visible=False,
698
  elem_id="extend_source_audio_upload",
 
700
  )
701
  extend_source.change(
702
  fn=lambda x: gr.update(
703
+ visible=x == "上传", elem_id="extend_source_audio_upload"
704
  ),
705
  inputs=[extend_source],
706
  outputs=[extend_source_audio_upload],
707
  )
708
 
709
+ extend_bnt = gr.Button("扩展", variant="primary")
710
+ extend_outputs, extend_input_params_json = create_output_ui("扩展")
711
 
712
  def extend_process_func(
713
  text2music_json_data,
 
735
  guidance_scale_text,
736
  guidance_scale_lyric,
737
  ):
738
+ if extend_source == "上传":
739
  src_audio_path = extend_source_audio_upload
740
  # get audio duration
741
  audio_duration = librosa.get_duration(filename=src_audio_path)
742
  json_data = {"audio_duration": audio_duration}
743
+ elif extend_source == "文本转音乐":
744
  json_data = text2music_json_data
745
  src_audio_path = json_data["audio_path"]
746
+ elif extend_source == "上次扩展":
747
  json_data = extend_input_params_json
748
  src_audio_path = json_data["audio_path"]
749
 
 
921
  load_data_func=dump_func,
922
  ):
923
  with gr.Blocks(
924
+ title="ACE-Step 模型 1.0 演示",
925
  ) as demo:
926
+ with gr.Tab("文本转音乐"):
 
 
 
 
 
 
 
 
 
 
927
  create_text2music_ui(
928
  gr=gr,
929
  text2music_process_func=text2music_process_func,
 
938
  demo.launch(
939
  server_name="0.0.0.0",
940
  server_port=7860,
941
+ )