ginipick commited on
Commit
9681ed4
ยท
verified ยท
1 Parent(s): 0141981

Update ui/components.py

Browse files
Files changed (1) hide show
  1. ui/components.py +841 -851
ui/components.py CHANGED
@@ -1,11 +1,3 @@
1
- """
2
- ACE-Step: A Step Towards Music Generation Foundation Model
3
-
4
- https://github.com/ace-step/ACE-Step
5
-
6
- Apache 2.0 License
7
- """
8
-
9
  import gradio as gr
10
  import librosa
11
  import os
@@ -150,7 +142,7 @@ QUALITY_PRESETS = {
150
  "High Quality": {
151
  "infer_step": 200,
152
  "guidance_scale": 18.0,
153
- "scheduler_type": "heun",
154
  "omega_scale": 15.0,
155
  "use_erg_diffusion": True,
156
  "use_erg_tag": True,
@@ -159,7 +151,7 @@ QUALITY_PRESETS = {
159
  "Ultra (Best)": {
160
  "infer_step": 299,
161
  "guidance_scale": 20.0,
162
- "scheduler_type": "heun",
163
  "omega_scale": 20.0,
164
  "use_erg_diffusion": True,
165
  "use_erg_tag": True,
@@ -398,510 +390,268 @@ def create_text2music_ui(
398
  enhanced_process_func = create_enhanced_process_func(text2music_process_func)
399
 
400
  with gr.Row():
401
- with gr.Column():
402
- # ํ’ˆ์งˆ ๋ฐ ์„ฑ๋Šฅ ์„ค์ • ์„น์…˜ ์ถ”๊ฐ€
 
403
  with gr.Group():
404
- gr.Markdown("### โšก ํ’ˆ์งˆ & ์„ฑ๋Šฅ ์„ค์ •")
405
- with gr.Row():
406
- quality_preset = gr.Dropdown(
407
- choices=list(QUALITY_PRESETS.keys()),
408
- value="Standard",
409
- label="ํ’ˆ์งˆ ํ”„๋ฆฌ์…‹",
410
- scale=2
411
- )
412
- multi_seed_mode = gr.Dropdown(
413
- choices=list(MULTI_SEED_OPTIONS.keys()),
414
- value="Single",
415
- label="๋‹ค์ค‘ ์ƒ์„ฑ ๋ชจ๋“œ",
416
- scale=2,
417
- info="์—ฌ๋Ÿฌ ๋ฒˆ ์ƒ์„ฑํ•˜์—ฌ ์ตœ๊ณ  ํ’ˆ์งˆ ์„ ํƒ"
418
- )
419
 
420
- preset_description = gr.Textbox(
421
- value=QUALITY_PRESETS["Standard"]["description"],
422
- label="์„ค๋ช…",
423
- interactive=False,
424
- max_lines=1
425
- )
426
-
427
- with gr.Row(equal_height=True):
428
- audio_duration = gr.Slider(
429
- -1,
430
- 240.0,
431
- step=0.00001,
432
- value=-1,
433
- label="Audio Duration",
434
- interactive=True,
435
- info="-1 means random duration (30 ~ 240).",
436
- scale=7,
437
- )
438
- sample_bnt = gr.Button("Sample", variant="secondary", scale=1)
439
- preview_bnt = gr.Button("๐ŸŽต Preview", variant="secondary", scale=2)
440
-
441
- # audio2audio
442
- with gr.Row(equal_height=True):
443
- audio2audio_enable = gr.Checkbox(
444
- label="Enable Audio2Audio",
445
- value=False,
446
- info="Check to enable Audio-to-Audio generation using a reference audio.",
447
- elem_id="audio2audio_checkbox"
448
- )
449
- lora_name_or_path = gr.Dropdown(
450
- label="Lora Name or Path",
451
- choices=["ACE-Step/ACE-Step-v1-chinese-rap-LoRA", "none"],
452
- value="none",
453
- allow_custom_value=True,
454
- )
455
-
456
- ref_audio_input = gr.Audio(
457
- type="filepath",
458
- label="Reference Audio (for Audio2Audio)",
459
- visible=False,
460
- elem_id="ref_audio_input",
461
- show_download_button=True
462
- )
463
- ref_audio_strength = gr.Slider(
464
- label="Refer audio strength",
465
- minimum=0.0,
466
- maximum=1.0,
467
- step=0.01,
468
- value=0.5,
469
- elem_id="ref_audio_strength",
470
- visible=False,
471
- interactive=True,
472
- )
473
-
474
- def toggle_ref_audio_visibility(is_checked):
475
- return (
476
- gr.update(visible=is_checked, elem_id="ref_audio_input"),
477
- gr.update(visible=is_checked, elem_id="ref_audio_strength"),
478
- )
479
-
480
- audio2audio_enable.change(
481
- fn=toggle_ref_audio_visibility,
482
- inputs=[audio2audio_enable],
483
- outputs=[ref_audio_input, ref_audio_strength],
484
- )
485
-
486
- with gr.Column(scale=2):
487
- with gr.Group():
488
- gr.Markdown("""### ๐ŸŽผ ์Šค๋งˆํŠธ ํ”„๋กฌํ”„ํŠธ ์‹œ์Šคํ…œ
489
- <center>์žฅ๋ฅด ์„ ํƒ ์‹œ ์ž๋™์œผ๋กœ ์ตœ์ ํ™”๋œ ํƒœ๊ทธ๊ฐ€ ์ถ”๊ฐ€๋ฉ๋‹ˆ๋‹ค. ์ฝค๋งˆ๋กœ ๊ตฌ๋ถ„ํ•˜์—ฌ ํƒœ๊ทธ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”.</center>""")
490
-
491
- with gr.Row():
492
  genre_preset = gr.Dropdown(
493
  choices=["Custom"] + list(GENRE_PRESETS.keys()),
494
  value="Custom",
495
- label="์žฅ๋ฅด ํ”„๋ฆฌ์…‹",
496
- scale=1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
497
  )
 
498
  enable_smart_enhancement = gr.Checkbox(
499
- label="์Šค๋งˆํŠธ ํ–ฅ์ƒ",
500
  value=True,
501
- info="์ž๋™ ํƒœ๊ทธ ์ตœ์ ํ™”",
502
- scale=1
 
 
 
 
 
503
  )
504
-
505
- prompt = gr.Textbox(
506
- lines=2,
507
- label="Tags",
508
- max_lines=4,
509
- value=TAG_DEFAULT,
510
- placeholder="์ฝค๋งˆ๋กœ ๊ตฌ๋ถ„๋œ ํƒœ๊ทธ๋“ค...",
511
- )
512
 
 
513
  with gr.Group():
514
- gr.Markdown("""### ๐Ÿ“ ๊ฐ€์‚ฌ ์ž…๋ ฅ
515
- <center>๊ตฌ์กฐ ํƒœ๊ทธ [verse], [chorus], [bridge] ์‚ฌ์šฉ์„ ๊ถŒ์žฅํ•ฉ๋‹ˆ๋‹ค.<br>[instrumental] ๋˜๋Š” [inst]๋ฅผ ์‚ฌ์šฉํ•˜๋ฉด ์—ฐ์ฃผ๊ณก์„ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.</center>""")
516
 
517
- # --- ์ƒˆ๋กœ์šด UI ์š”์†Œ: ์ฃผ์ œ ์ž…๋ ฅ ํ›„ ๊ฐ€์‚ฌ ์ž๋™ ์ƒ์„ฑ ---
518
  with gr.Row():
519
- topic_for_lyrics = gr.Textbox(
520
- lines=1,
521
- label="๊ฐ€์‚ฌ ์ฃผ์ œ",
522
- placeholder="์˜ˆ) ์ฒซ์‚ฌ๋ž‘ ์ด๋ณ„, ์—ฌ๋ฆ„ ๋ฐ”๋‹ค, ๊ฐ€์„ ๋‚™์—ฝ ๋“ฑ..."
523
- )
524
- generate_lyrics_btn = gr.Button("๊ฐ€์‚ฌ ์ƒ์„ฑ", variant="secondary")
 
 
 
 
 
 
 
525
 
526
- # ์‚ฌ์šฉ์ž ์ง์ ‘ ์ž…๋ ฅ ๊ฐ€์‚ฌ ๋ฐ•์Šค
527
  lyrics = gr.Textbox(
528
- lines=9,
529
- label="Lyrics",
530
- max_lines=13,
531
  value=LYRIC_DEFAULT,
532
- placeholder="๊ฐ€์‚ฌ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”. [verse], [chorus] ๋“ฑ์˜ ๊ตฌ์กฐ ํƒœ๊ทธ ์‚ฌ์šฉ์„ ๊ถŒ์žฅํ•ฉ๋‹ˆ๋‹ค."
533
- )
534
-
535
- # OpenAI๋ฅผ ํ†ตํ•ด ๊ฐ€์‚ฌ ์ž๋™ ์ƒ์„ฑํ•˜๋Š” ํ•จ์ˆ˜
536
- def generate_lyrics_ui(topic_text):
537
- # OpenAI ํ˜ธ์ถœ
538
- generated = openai_generate_lyrics(topic_text)
539
- return generated
540
-
541
- # ๊ฐ€์‚ฌ ์ƒ์„ฑ ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ, lyrics ๋ฐ•์Šค์— ๋ฐ˜์˜
542
- generate_lyrics_btn.click(
543
- fn=generate_lyrics_ui,
544
- inputs=[topic_for_lyrics],
545
- outputs=[lyrics]
546
- )
547
-
548
- with gr.Accordion("Basic Settings", open=False):
549
- infer_step = gr.Slider(
550
- minimum=1,
551
- maximum=300,
552
- step=1,
553
- value=150,
554
- label="Infer Steps",
555
- interactive=True,
556
- )
557
- guidance_scale = gr.Slider(
558
- minimum=0.0,
559
- maximum=30.0,
560
- step=0.1,
561
- value=15.0,
562
- label="Guidance Scale",
563
- interactive=True,
564
- info="When guidance_scale_lyric > 1 and guidance_scale_text > 1, the guidance scale will not be applied.",
565
- )
566
- guidance_scale_text = gr.Slider(
567
- minimum=0.0,
568
- maximum=10.0,
569
- step=0.1,
570
- value=0.0,
571
- label="Guidance Scale Text",
572
- interactive=True,
573
- info="Guidance scale for text condition. It can only apply to cfg. set guidance_scale_text=5.0, guidance_scale_lyric=1.5 for start",
574
- )
575
- guidance_scale_lyric = gr.Slider(
576
- minimum=0.0,
577
- maximum=10.0,
578
- step=0.1,
579
- value=0.0,
580
- label="Guidance Scale Lyric",
581
- interactive=True,
582
  )
583
 
584
- manual_seeds = gr.Textbox(
585
- label="manual seeds (default None)",
586
- placeholder="1,2,3,4",
587
- value=None,
588
- info="Seed for the generation",
589
- )
590
-
591
- with gr.Accordion("Advanced Settings", open=False):
592
- scheduler_type = gr.Radio(
593
- ["euler", "heun"],
594
- value="euler",
595
- label="Scheduler Type",
596
- elem_id="scheduler_type",
597
- info="Scheduler type for the generation. euler is recommended. heun will take more time.",
598
- )
599
- cfg_type = gr.Radio(
600
- ["cfg", "apg", "cfg_star"],
601
- value="apg",
602
- label="CFG Type",
603
- elem_id="cfg_type",
604
- info="CFG type for the generation. apg is recommended. cfg and cfg_star are almost the same.",
605
- )
606
- use_erg_tag = gr.Checkbox(
607
- label="use ERG for tag",
608
- value=True,
609
- info="Use Entropy Rectifying Guidance for tag. It will multiple a temperature to the attention to make a weaker tag condition and make better diversity.",
610
  )
611
- use_erg_lyric = gr.Checkbox(
612
- label="use ERG for lyric",
613
- value=False,
614
- info="The same but apply to lyric encoder's attention.",
 
615
  )
616
- use_erg_diffusion = gr.Checkbox(
617
- label="use ERG for diffusion",
618
- value=True,
619
- info="The same but apply to diffusion model's attention.",
620
  )
621
-
622
- omega_scale = gr.Slider(
623
- minimum=-100.0,
624
- maximum=100.0,
625
- step=0.1,
626
- value=10.0,
627
- label="Granularity Scale",
628
- interactive=True,
629
- info="Granularity scale for the generation. Higher values can reduce artifacts",
630
- )
631
-
632
- guidance_interval = gr.Slider(
633
  minimum=0.0,
634
  maximum=1.0,
635
  step=0.01,
636
  value=0.5,
637
- label="Guidance Interval",
638
- interactive=True,
639
- info="Guidance interval for the generation. 0.5 means only apply guidance in the middle steps (0.25 * infer_steps to 0.75 * infer_steps)",
640
- )
641
- guidance_interval_decay = gr.Slider(
642
- minimum=0.0,
643
- maximum=1.0,
644
- step=0.01,
645
- value=0.0,
646
- label="Guidance Interval Decay",
647
- interactive=True,
648
- info="Guidance interval decay for the generation. Guidance scale will decay from guidance_scale to min_guidance_scale in the interval. 0.0 means no decay.",
649
- )
650
- min_guidance_scale = gr.Slider(
651
- minimum=0.0,
652
- maximum=200.0,
653
- step=0.1,
654
- value=3.0,
655
- label="Min Guidance Scale",
656
- interactive=True,
657
- info="Min guidance scale for guidance interval decay's end scale",
658
- )
659
- oss_steps = gr.Textbox(
660
- label="OSS Steps",
661
- placeholder="16, 29, 52, 96, 129, 158, 172, 183, 189, 200",
662
- value=None,
663
- info="Optimal Steps for the generation. But not test well",
664
  )
665
 
666
- text2music_bnt = gr.Button("๐ŸŽต Generate Music", variant="primary", size="lg")
667
-
668
- # ๋ชจ๋“  UI ์š”์†Œ๊ฐ€ ์ •์˜๋œ ํ›„ ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ ์„ค์ •
669
- genre_preset.change(
670
- fn=update_tags_from_preset,
671
- inputs=[genre_preset],
672
- outputs=[prompt]
673
- )
674
-
675
- quality_preset.change(
676
- fn=lambda x: QUALITY_PRESETS.get(x, {}).get("description", ""),
677
- inputs=[quality_preset],
678
- outputs=[preset_description]
679
- )
680
 
681
- quality_preset.change(
682
- fn=update_quality_preset,
683
- inputs=[quality_preset],
684
- outputs=[infer_step, guidance_scale, scheduler_type, omega_scale, use_erg_diffusion, use_erg_tag]
685
- )
 
 
 
 
 
686
 
687
- with gr.Column():
 
 
 
 
 
 
 
 
 
 
 
 
 
688
  outputs, input_params_json = create_output_ui()
689
 
690
- # ์‹ค์‹œ๊ฐ„ ํ”„๋ฆฌ๋ทฐ ๊ธฐ๋Šฅ
691
- def generate_preview(prompt, lyrics, genre_preset):
692
- """10์ดˆ ํ”„๋ฆฌ๋ทฐ ์ƒ์„ฑ"""
693
- preview_params = {
694
- "audio_duration": 10,
695
- "infer_step": 50,
696
- "guidance_scale": 12.0,
697
- "scheduler_type": "euler",
698
- "cfg_type": "apg",
699
- "omega_scale": 5.0,
700
- }
701
-
702
- enhanced_prompt = enhance_prompt_with_genre(prompt, genre_preset) if genre_preset != "Custom" else prompt
703
-
704
- try:
705
- # ์‹ค์ œ ๊ตฌํ˜„์—์„œ๋Š” ๋น ๋ฅธ ์ƒ์„ฑ ๋ชจ๋“œ ์‚ฌ์šฉ
706
- result = enhanced_process_func(
707
- preview_params["audio_duration"],
708
- enhanced_prompt,
709
- lyrics[:200], # ๊ฐ€์‚ฌ ์ผ๋ถ€๋งŒ ์‚ฌ์šฉ
710
- preview_params["infer_step"],
711
- preview_params["guidance_scale"],
712
- preview_params["scheduler_type"],
713
- preview_params["cfg_type"],
714
- preview_params["omega_scale"],
715
- None, # manual_seeds
716
- 0.5, # guidance_interval
717
- 0.0, # guidance_interval_decay
718
- 3.0, # min_guidance_scale
719
- True, # use_erg_tag
720
- False, # use_erg_lyric
721
- True, # use_erg_diffusion
722
- None, # oss_steps
723
- 0.0, # guidance_scale_text
724
- 0.0, # guidance_scale_lyric
725
- multi_seed_mode="Single"
726
  )
727
- return result[0] if result else None
728
- except Exception as e:
729
- return f"ํ”„๋ฆฌ๋ทฐ ์ƒ์„ฑ ์‹คํŒจ: {str(e)}"
730
-
731
- preview_bnt.click(
732
- fn=generate_preview,
733
- inputs=[prompt, lyrics, genre_preset],
734
- outputs=[outputs[0]]
735
- )
736
-
737
- with gr.Tab("retake"):
738
- retake_variance = gr.Slider(
739
- minimum=0.0, maximum=1.0, step=0.01, value=0.2, label="variance"
740
- )
741
- retake_seeds = gr.Textbox(
742
- label="retake seeds (default None)", placeholder="", value=None
743
- )
744
- retake_bnt = gr.Button("Retake", variant="primary")
745
- retake_outputs, retake_input_params_json = create_output_ui("Retake")
746
-
747
- def retake_process_func(json_data, retake_variance, retake_seeds):
748
- return enhanced_process_func(
749
- json_data.get("audio_duration", 30),
750
- json_data.get("prompt", ""),
751
- json_data.get("lyrics", ""),
752
- json_data.get("infer_step", 100),
753
- json_data.get("guidance_scale", 15.0),
754
- json_data.get("scheduler_type", "euler"),
755
- json_data.get("cfg_type", "apg"),
756
- json_data.get("omega_scale", 10.0),
757
- retake_seeds,
758
- json_data.get("guidance_interval", 0.5),
759
- json_data.get("guidance_interval_decay", 0.0),
760
- json_data.get("min_guidance_scale", 3.0),
761
- json_data.get("use_erg_tag", True),
762
- json_data.get("use_erg_lyric", False),
763
- json_data.get("use_erg_diffusion", True),
764
- json_data.get("oss_steps", None),
765
- json_data.get("guidance_scale_text", 0.0),
766
- json_data.get("guidance_scale_lyric", 0.0),
767
- audio2audio_enable=json_data.get("audio2audio_enable", False),
768
- ref_audio_strength=json_data.get("ref_audio_strength", 0.5),
769
- ref_audio_input=json_data.get("ref_audio_input", None),
770
- lora_name_or_path=json_data.get("lora_name_or_path", "none"),
771
- multi_seed_mode="Best of 3", # retake๋Š” ์ž๋™์œผ๋กœ ๋‹ค์ค‘ ์ƒ์„ฑ
772
- retake_variance=retake_variance,
773
- task="retake"
774
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
775
 
776
- retake_bnt.click(
777
- fn=retake_process_func,
778
- inputs=[
779
- input_params_json,
780
- retake_variance,
781
- retake_seeds,
782
- ],
783
- outputs=retake_outputs + [retake_input_params_json],
784
- )
785
-
786
- with gr.Tab("repainting"):
787
- retake_variance = gr.Slider(
788
- minimum=0.0, maximum=1.0, step=0.01, value=0.2, label="variance"
789
- )
790
- retake_seeds = gr.Textbox(
791
- label="repaint seeds (default None)", placeholder="", value=None
792
- )
793
- repaint_start = gr.Slider(
794
- minimum=0.0,
795
- maximum=240.0,
796
- step=0.01,
797
- value=0.0,
798
- label="Repaint Start Time",
799
- interactive=True,
800
- )
801
- repaint_end = gr.Slider(
802
- minimum=0.0,
803
- maximum=240.0,
804
- step=0.01,
805
- value=30.0,
806
- label="Repaint End Time",
807
- interactive=True,
808
- )
809
- repaint_source = gr.Radio(
810
- ["text2music", "last_repaint", "upload"],
811
- value="text2music",
812
- label="Repaint Source",
813
- elem_id="repaint_source",
814
- )
815
 
816
- repaint_source_audio_upload = gr.Audio(
817
- label="Upload Audio",
818
- type="filepath",
819
- visible=False,
820
- elem_id="repaint_source_audio_upload",
821
- show_download_button=True,
822
- )
823
- repaint_source.change(
824
- fn=lambda x: gr.update(
825
- visible=x == "upload", elem_id="repaint_source_audio_upload"
826
- ),
827
- inputs=[repaint_source],
828
- outputs=[repaint_source_audio_upload],
829
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
830
 
831
- repaint_bnt = gr.Button("Repaint", variant="primary")
832
- repaint_outputs, repaint_input_params_json = create_output_ui("Repaint")
833
-
834
- def repaint_process_func(
835
- text2music_json_data,
836
- repaint_json_data,
837
- retake_variance,
838
- retake_seeds,
839
- repaint_start,
840
- repaint_end,
841
- repaint_source,
842
- repaint_source_audio_upload,
843
- prompt,
844
- lyrics,
845
- infer_step,
846
- guidance_scale,
847
- scheduler_type,
848
- cfg_type,
849
- omega_scale,
850
- manual_seeds,
851
- guidance_interval,
852
- guidance_interval_decay,
853
- min_guidance_scale,
854
- use_erg_tag,
855
- use_erg_lyric,
856
- use_erg_diffusion,
857
- oss_steps,
858
- guidance_scale_text,
859
- guidance_scale_lyric,
860
- ):
861
- if repaint_source == "upload":
862
- src_audio_path = repaint_source_audio_upload
863
- audio_duration = librosa.get_duration(filename=src_audio_path)
864
- json_data = {"audio_duration": audio_duration}
865
- elif repaint_source == "text2music":
866
- json_data = text2music_json_data
867
- src_audio_path = json_data["audio_path"]
868
- elif repaint_source == "last_repaint":
869
- json_data = repaint_json_data
870
- src_audio_path = json_data["audio_path"]
871
-
872
- return enhanced_process_func(
873
- json_data["audio_duration"],
874
- prompt,
875
- lyrics,
876
- infer_step,
877
- guidance_scale,
878
- scheduler_type,
879
- cfg_type,
880
- omega_scale,
881
- manual_seeds,
882
- guidance_interval,
883
- guidance_interval_decay,
884
- min_guidance_scale,
885
- use_erg_tag,
886
- use_erg_lyric,
887
- use_erg_diffusion,
888
- oss_steps,
889
- guidance_scale_text,
890
- guidance_scale_lyric,
891
- retake_seeds=retake_seeds,
892
- retake_variance=retake_variance,
893
- task="repaint",
894
- repaint_start=repaint_start,
895
- repaint_end=repaint_end,
896
- src_audio_path=src_audio_path,
897
- lora_name_or_path="none"
898
  )
899
 
900
- repaint_bnt.click(
901
- fn=repaint_process_func,
902
- inputs=[
903
- input_params_json,
904
- repaint_input_params_json,
 
905
  retake_variance,
906
  retake_seeds,
907
  repaint_start,
@@ -925,157 +675,150 @@ def create_text2music_ui(
925
  oss_steps,
926
  guidance_scale_text,
927
  guidance_scale_lyric,
928
- ],
929
- outputs=repaint_outputs + [repaint_input_params_json],
930
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
931
 
932
- with gr.Tab("edit"):
933
- edit_prompt = gr.Textbox(lines=2, label="Edit Tags", max_lines=4)
934
- edit_lyrics = gr.Textbox(lines=9, label="Edit Lyrics", max_lines=13)
935
- retake_seeds = gr.Textbox(
936
- label="edit seeds (default None)", placeholder="", value=None
937
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
938
 
939
- edit_type = gr.Radio(
940
- ["only_lyrics", "remix"],
941
- value="only_lyrics",
942
- label="Edit Type",
943
- elem_id="edit_type",
944
- info="`only_lyrics` will keep the whole song the same except lyrics difference. Make your diffrence smaller, e.g. one lyrc line change.\nremix can change the song melody and genre",
945
- )
946
- edit_n_min = gr.Slider(
947
- minimum=0.0,
948
- maximum=1.0,
949
- step=0.01,
950
- value=0.6,
951
- label="edit_n_min",
952
- interactive=True,
953
- )
954
- edit_n_max = gr.Slider(
955
- minimum=0.0,
956
- maximum=1.0,
957
- step=0.01,
958
- value=1.0,
959
- label="edit_n_max",
960
- interactive=True,
961
- )
962
 
963
- def edit_type_change_func(edit_type):
964
- if edit_type == "only_lyrics":
965
- n_min = 0.6
966
- n_max = 1.0
967
- elif edit_type == "remix":
968
- n_min = 0.2
969
- n_max = 0.4
970
- return n_min, n_max
971
-
972
- edit_type.change(
973
- edit_type_change_func,
974
- inputs=[edit_type],
975
- outputs=[edit_n_min, edit_n_max],
976
- )
 
 
 
 
 
 
 
 
 
977
 
978
- edit_source = gr.Radio(
979
- ["text2music", "last_edit", "upload"],
980
- value="text2music",
981
- label="Edit Source",
982
- elem_id="edit_source",
983
- )
984
- edit_source_audio_upload = gr.Audio(
985
- label="Upload Audio",
986
- type="filepath",
987
- visible=False,
988
- elem_id="edit_source_audio_upload",
989
- show_download_button=True,
990
- )
991
- edit_source.change(
992
- fn=lambda x: gr.update(
993
- visible=x == "upload", elem_id="edit_source_audio_upload"
994
- ),
995
- inputs=[edit_source],
996
- outputs=[edit_source_audio_upload],
997
- )
998
 
999
- edit_bnt = gr.Button("Edit", variant="primary")
1000
- edit_outputs, edit_input_params_json = create_output_ui("Edit")
1001
-
1002
- def edit_process_func(
1003
- text2music_json_data,
1004
- edit_input_params_json,
1005
- edit_source,
1006
- edit_source_audio_upload,
1007
- prompt,
1008
- lyrics,
1009
- edit_prompt,
1010
- edit_lyrics,
1011
- edit_n_min,
1012
- edit_n_max,
1013
- infer_step,
1014
- guidance_scale,
1015
- scheduler_type,
1016
- cfg_type,
1017
- omega_scale,
1018
- manual_seeds,
1019
- guidance_interval,
1020
- guidance_interval_decay,
1021
- min_guidance_scale,
1022
- use_erg_tag,
1023
- use_erg_lyric,
1024
- use_erg_diffusion,
1025
- oss_steps,
1026
- guidance_scale_text,
1027
- guidance_scale_lyric,
1028
- retake_seeds,
1029
- ):
1030
- if edit_source == "upload":
1031
- src_audio_path = edit_source_audio_upload
1032
- audio_duration = librosa.get_duration(filename=src_audio_path)
1033
- json_data = {"audio_duration": audio_duration}
1034
- elif edit_source == "text2music":
1035
- json_data = text2music_json_data
1036
- src_audio_path = json_data["audio_path"]
1037
- elif edit_source == "last_edit":
1038
- json_data = edit_input_params_json
1039
- src_audio_path = json_data["audio_path"]
1040
-
1041
- if not edit_prompt:
1042
- edit_prompt = prompt
1043
- if not edit_lyrics:
1044
- edit_lyrics = lyrics
1045
-
1046
- return enhanced_process_func(
1047
- json_data["audio_duration"],
1048
- prompt,
1049
- lyrics,
1050
- infer_step,
1051
- guidance_scale,
1052
- scheduler_type,
1053
- cfg_type,
1054
- omega_scale,
1055
- manual_seeds,
1056
- guidance_interval,
1057
- guidance_interval_decay,
1058
- min_guidance_scale,
1059
- use_erg_tag,
1060
- use_erg_lyric,
1061
- use_erg_diffusion,
1062
- oss_steps,
1063
- guidance_scale_text,
1064
- guidance_scale_lyric,
1065
- task="edit",
1066
- src_audio_path=src_audio_path,
1067
- edit_target_prompt=edit_prompt,
1068
- edit_target_lyrics=edit_lyrics,
1069
- edit_n_min=edit_n_min,
1070
- edit_n_max=edit_n_max,
1071
- retake_seeds=retake_seeds,
1072
- lora_name_or_path="none"
1073
  )
1074
 
1075
- edit_bnt.click(
1076
- fn=edit_process_func,
1077
- inputs=[
1078
- input_params_json,
 
1079
  edit_input_params_json,
1080
  edit_source,
1081
  edit_source_audio_upload,
@@ -1101,127 +844,132 @@ def create_text2music_ui(
1101
  guidance_scale_text,
1102
  guidance_scale_lyric,
1103
  retake_seeds,
1104
- ],
1105
- outputs=edit_outputs + [edit_input_params_json],
1106
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1107
 
1108
- with gr.Tab("extend"):
1109
- extend_seeds = gr.Textbox(
1110
- label="extend seeds (default None)", placeholder="", value=None
1111
- )
1112
- left_extend_length = gr.Slider(
1113
- minimum=0.0,
1114
- maximum=240.0,
1115
- step=0.01,
1116
- value=0.0,
1117
- label="Left Extend Length",
1118
- interactive=True,
1119
- )
1120
- right_extend_length = gr.Slider(
1121
- minimum=0.0,
1122
- maximum=240.0,
1123
- step=0.01,
1124
- value=30.0,
1125
- label="Right Extend Length",
1126
- interactive=True,
1127
- )
1128
- extend_source = gr.Radio(
1129
- ["text2music", "last_extend", "upload"],
1130
- value="text2music",
1131
- label="Extend Source",
1132
- elem_id="extend_source",
1133
- )
 
 
 
 
 
 
1134
 
1135
- extend_source_audio_upload = gr.Audio(
1136
- label="Upload Audio",
1137
- type="filepath",
1138
- visible=False,
1139
- elem_id="extend_source_audio_upload",
1140
- show_download_button=True,
1141
- )
1142
- extend_source.change(
1143
- fn=lambda x: gr.update(
1144
- visible=x == "upload", elem_id="extend_source_audio_upload"
1145
- ),
1146
- inputs=[extend_source],
1147
- outputs=[extend_source_audio_upload],
1148
- )
 
 
 
 
 
 
 
 
 
 
 
 
1149
 
1150
- extend_bnt = gr.Button("Extend", variant="primary")
1151
- extend_outputs, extend_input_params_json = create_output_ui("Extend")
1152
-
1153
- def extend_process_func(
1154
- text2music_json_data,
1155
- extend_input_params_json,
1156
- extend_seeds,
1157
- left_extend_length,
1158
- right_extend_length,
1159
- extend_source,
1160
- extend_source_audio_upload,
1161
- prompt,
1162
- lyrics,
1163
- infer_step,
1164
- guidance_scale,
1165
- scheduler_type,
1166
- cfg_type,
1167
- omega_scale,
1168
- manual_seeds,
1169
- guidance_interval,
1170
- guidance_interval_decay,
1171
- min_guidance_scale,
1172
- use_erg_tag,
1173
- use_erg_lyric,
1174
- use_erg_diffusion,
1175
- oss_steps,
1176
- guidance_scale_text,
1177
- guidance_scale_lyric,
1178
- ):
1179
- if extend_source == "upload":
1180
- src_audio_path = extend_source_audio_upload
1181
- # get audio duration
1182
- audio_duration = librosa.get_duration(filename=src_audio_path)
1183
- json_data = {"audio_duration": audio_duration}
1184
- elif extend_source == "text2music":
1185
- json_data = text2music_json_data
1186
- src_audio_path = json_data["audio_path"]
1187
- elif extend_source == "last_extend":
1188
- json_data = extend_input_params_json
1189
- src_audio_path = json_data["audio_path"]
1190
-
1191
- repaint_start = -left_extend_length
1192
- repaint_end = json_data["audio_duration"] + right_extend_length
1193
- return enhanced_process_func(
1194
- json_data["audio_duration"],
1195
- prompt,
1196
- lyrics,
1197
- infer_step,
1198
- guidance_scale,
1199
- scheduler_type,
1200
- cfg_type,
1201
- omega_scale,
1202
- manual_seeds,
1203
- guidance_interval,
1204
- guidance_interval_decay,
1205
- min_guidance_scale,
1206
- use_erg_tag,
1207
- use_erg_lyric,
1208
- use_erg_diffusion,
1209
- oss_steps,
1210
- guidance_scale_text,
1211
- guidance_scale_lyric,
1212
- retake_seeds=extend_seeds,
1213
- retake_variance=1.0,
1214
- task="extend",
1215
- repaint_start=repaint_start,
1216
- repaint_end=repaint_end,
1217
- src_audio_path=src_audio_path,
1218
- lora_name_or_path="none"
1219
  )
 
 
 
 
 
 
 
 
 
 
1220
 
1221
- extend_bnt.click(
1222
- fn=extend_process_func,
1223
- inputs=[
1224
- input_params_json,
1225
  extend_input_params_json,
1226
  extend_seeds,
1227
  left_extend_length,
@@ -1245,92 +993,245 @@ def create_text2music_ui(
1245
  oss_steps,
1246
  guidance_scale_text,
1247
  guidance_scale_lyric,
1248
- ],
1249
- outputs=extend_outputs + [extend_input_params_json],
1250
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1251
 
1252
- def json2output(json_data):
1253
- return (
1254
- json_data["audio_duration"],
1255
- json_data["prompt"],
1256
- json_data["lyrics"],
1257
- json_data["infer_step"],
1258
- json_data["guidance_scale"],
1259
- json_data["scheduler_type"],
1260
- json_data["cfg_type"],
1261
- json_data["omega_scale"],
1262
- ", ".join(map(str, json_data["actual_seeds"])),
1263
- json_data["guidance_interval"],
1264
- json_data["guidance_interval_decay"],
1265
- json_data["min_guidance_scale"],
1266
- json_data["use_erg_tag"],
1267
- json_data["use_erg_lyric"],
1268
- json_data["use_erg_diffusion"],
1269
- ", ".join(map(str, json_data["oss_steps"])),
1270
- (
1271
- json_data["guidance_scale_text"]
1272
- if "guidance_scale_text" in json_data
1273
- else 0.0
1274
- ),
1275
- (
1276
- json_data["guidance_scale_lyric"]
1277
- if "guidance_scale_lyric" in json_data
1278
- else 0.0
1279
- ),
1280
- (
1281
- json_data["audio2audio_enable"]
1282
- if "audio2audio_enable" in json_data
1283
- else False
1284
- ),
1285
- (
1286
- json_data["ref_audio_strength"]
1287
- if "ref_audio_strength" in json_data
1288
- else 0.5
1289
- ),
1290
- (
1291
- json_data["ref_audio_input"]
1292
- if "ref_audio_input" in json_data
1293
- else None
1294
- ),
1295
  )
 
 
 
1296
 
1297
- def sample_data(lora_name_or_path_):
1298
- if sample_data_func:
1299
- # sample_data_func๋Š” ์ธ์ž๋ฅผ ๋ฐ›์ง€ ์•Š๋Š” ๋ฉ”์„œ๋“œ์ด๋ฏ€๋กœ ์ธ์ž ์—†์ด ํ˜ธ์ถœ
1300
- json_data = sample_data_func() # lora_name_or_path_ ์ธ์ž ์ œ๊ฑฐ
1301
- return json2output(json_data)
1302
- return {}
1303
-
1304
-
1305
- sample_bnt.click(
1306
- sample_data,
1307
- inputs=[lora_name_or_path],
1308
- outputs=[
1309
- audio_duration,
1310
- prompt,
1311
- lyrics,
1312
- infer_step,
1313
- guidance_scale,
1314
- scheduler_type,
1315
- cfg_type,
1316
- omega_scale,
1317
- manual_seeds,
1318
- guidance_interval,
1319
- guidance_interval_decay,
1320
- min_guidance_scale,
1321
- use_erg_tag,
1322
- use_erg_lyric,
1323
- use_erg_diffusion,
1324
- oss_steps,
1325
- guidance_scale_text,
1326
- guidance_scale_lyric,
1327
- audio2audio_enable,
1328
- ref_audio_strength,
1329
- ref_audio_input,
1330
- ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1331
  )
1332
 
1333
- # ๋ฉ”์ธ ์ƒ์„ฑ ๋ฒ„ํŠผ ์ด๋ฒคํŠธ (ํ–ฅ์ƒ๋œ ํ•จ์ˆ˜ ์‚ฌ์šฉ)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1334
  text2music_bnt.click(
1335
  fn=enhanced_process_func,
1336
  inputs=[
@@ -1370,57 +1271,146 @@ def create_main_demo_ui(
1370
  load_data_func=dump_func,
1371
  ):
1372
  with gr.Blocks(
1373
- title="ACE-Step Model 1.0 DEMO - Enhanced",
1374
- theme=gr.themes.Soft(),
 
 
 
 
1375
  css="""
1376
  .gradio-container {
1377
- max-width: 1200px !important;
 
1378
  }
1379
- .quality-info {
1380
- background: linear-gradient(45deg, #f0f8ff, #e6f3ff);
1381
- padding: 10px;
1382
- border-radius: 8px;
1383
- margin: 5px 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1384
  }
1385
  """
1386
  ) as demo:
1387
  gr.Markdown(
1388
  """
1389
- <h1 style="text-align: center;">๐ŸŽต ACE-Step PRO</h1>
1390
- <div style="text-align: center; margin: 20px;">
1391
- <p><strong>๐Ÿš€ ์ƒˆ๋กœ์šด ๊ธฐ๋Šฅ:</strong> ํ’ˆ์งˆ ํ”„๋ฆฌ์…‹ | ๋‹ค์ค‘ ์ƒ์„ฑ | ์Šค๋งˆํŠธ ํ”„๋กฌํ”„ํŠธ | ์‹ค์‹œ๊ฐ„ ํ”„๋ฆฌ๋ทฐ | ํ’ˆ์งˆ ์ ์ˆ˜</p>
1392
- <p>
1393
- <a href="https://ace-step.github.io/" target='_blank'>Project</a> |
1394
- <a href="https://huggingface.co/ACE-Step/ACE-Step-v1-3.5B">Checkpoints</a> |
1395
- <a href="https://discord.gg/rjAZz2xBdG" target='_blank'>Discord</a>
1396
  </p>
1397
  </div>
1398
- """
1399
  )
1400
 
1401
- # ์‚ฌ์šฉ๋ฒ• ๊ฐ€์ด๋“œ ์ถ”๊ฐ€
1402
- with gr.Accordion("๐Ÿ“– ์‚ฌ์šฉ๋ฒ• ๊ฐ€์ด๋“œ", open=False):
1403
- gr.Markdown("""
1404
- ### ๐ŸŽฏ ๋น ๋ฅธ ์‹œ์ž‘
1405
- 1. **์žฅ๋ฅด ์„ ํƒ**: ์›ํ•˜๋Š” ์Œ์•… ์žฅ๋ฅด๋ฅผ ์„ ํƒํ•˜๋ฉด ์ž๋™์œผ๋กœ ์ตœ์ ํ™”๋œ ํƒœ๊ทธ๊ฐ€ ์ ์šฉ๋ฉ๋‹ˆ๋‹ค
1406
- 2. **ํ’ˆ์งˆ ์„ค์ •**: Draft(๋น ๋ฆ„) โ†’ Standard(๊ถŒ์žฅ) โ†’ High Quality โ†’ Ultra ์ค‘ ์„ ํƒ
1407
- 3. **๋‹ค์ค‘ ์ƒ์„ฑ**: "Best of 3/5/10" ์„ ํƒํ•˜๋ฉด ์—ฌ๋Ÿฌ ๋ฒˆ ์ƒ์„ฑํ•˜์—ฌ ์ตœ๊ณ  ํ’ˆ์งˆ์„ ์ž๋™ ์„ ํƒํ•ฉ๋‹ˆ๋‹ค
1408
- 4. **ํ”„๋ฆฌ๋ทฐ**: ์ „์ฒด ์ƒ์„ฑ ์ „ 10์ดˆ ํ”„๋ฆฌ๋ทฐ๋กœ ๋น ๋ฅด๊ฒŒ ํ™•์ธํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค
1409
-
1410
- ### ๐Ÿ’ก ํ’ˆ์งˆ ํ–ฅ์ƒ ํŒ
1411
- - **๊ณ ํ’ˆ์งˆ ์ƒ์„ฑ**: "High Quality" + "Best of 5" ์กฐํ•ฉ ์ถ”์ฒœ
1412
- - **๋น ๋ฅธ ํ…Œ์ŠคํŠธ**: "Draft" + "ํ”„๋ฆฌ๋ทฐ" ๊ธฐ๋Šฅ ํ™œ์šฉ
1413
- - **์žฅ๋ฅด ํŠนํ™”**: ์žฅ๋ฅด ํ”„๋ฆฌ์…‹ ์„ ํƒ ํ›„ "์Šค๋งˆํŠธ ํ–ฅ์ƒ" ์ฒดํฌ
1414
- - **๊ฐ€์‚ฌ ๊ตฌ์กฐ**: [verse], [chorus], [bridge] ํƒœ๊ทธ ์ ๊ทน ํ™œ์šฉ
1415
- """)
1416
-
1417
- with gr.Tab("๐ŸŽต Enhanced Text2Music"):
1418
  create_text2music_ui(
1419
  gr=gr,
1420
  text2music_process_func=text2music_process_func,
1421
  sample_data_func=sample_data_func,
1422
  load_data_func=load_data_func,
1423
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1424
  return demo
1425
 
1426
 
@@ -1430,4 +1420,4 @@ if __name__ == "__main__":
1430
  server_name="0.0.0.0",
1431
  server_port=7860,
1432
  share=True # ๊ณต์œ  ๋งํฌ ์ƒ์„ฑ
1433
- )
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import librosa
3
  import os
 
142
  "High Quality": {
143
  "infer_step": 200,
144
  "guidance_scale": 18.0,
145
+ "scheduler_type": "euler",
146
  "omega_scale": 15.0,
147
  "use_erg_diffusion": True,
148
  "use_erg_tag": True,
 
151
  "Ultra (Best)": {
152
  "infer_step": 299,
153
  "guidance_scale": 20.0,
154
+ "scheduler_type": "euler",
155
  "omega_scale": 20.0,
156
  "use_erg_diffusion": True,
157
  "use_erg_tag": True,
 
390
  enhanced_process_func = create_enhanced_process_func(text2music_process_func)
391
 
392
  with gr.Row():
393
+ # ์™ผ์ชฝ ์ปฌ๋Ÿผ - ์ž…๋ ฅ ์„ค์ •
394
+ with gr.Column(scale=5):
395
+ # ์ƒ๋‹จ ๋ฉ”์ธ ์ปจํŠธ๋กค
396
  with gr.Group():
397
+ gr.Markdown("## ๐ŸŽฏ Quick Settings")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
398
 
399
+ with gr.Row():
400
+ with gr.Column(scale=2):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
401
  genre_preset = gr.Dropdown(
402
  choices=["Custom"] + list(GENRE_PRESETS.keys()),
403
  value="Custom",
404
+ label="๐ŸŽต ์žฅ๋ฅด ํ”„๋ฆฌ์…‹",
405
+ interactive=True,
406
+ )
407
+ with gr.Column(scale=2):
408
+ quality_preset = gr.Dropdown(
409
+ choices=list(QUALITY_PRESETS.keys()),
410
+ value="Standard",
411
+ label="โšก ํ’ˆ์งˆ ํ”„๋ฆฌ์…‹",
412
+ interactive=True
413
+ )
414
+ with gr.Column(scale=1):
415
+ audio_duration = gr.Slider(
416
+ -1,
417
+ 240.0,
418
+ step=1,
419
+ value=-1,
420
+ label="โฑ๏ธ ๊ธธ์ด (์ดˆ)",
421
+ info="-1 = ๋žœ๋ค",
422
+ interactive=True,
423
+ )
424
+
425
+ with gr.Row():
426
+ preset_description = gr.Textbox(
427
+ value=QUALITY_PRESETS["Standard"]["description"],
428
+ label="ํ’ˆ์งˆ ์„ค๋ช…",
429
+ interactive=False,
430
+ max_lines=1
431
+ )
432
+
433
+ # ํ”„๋กฌํ”„ํŠธ ์„น์…˜
434
+ with gr.Group():
435
+ gr.Markdown("## ๐ŸŽผ Music Prompt")
436
+
437
+ with gr.Row():
438
+ with gr.Column(scale=3):
439
+ prompt = gr.Textbox(
440
+ lines=3,
441
+ label="ํƒœ๊ทธ (์ฝค๋งˆ๋กœ ๊ตฌ๋ถ„)",
442
+ value=TAG_DEFAULT,
443
+ placeholder="์žฅ๋ฅด, ์•…๊ธฐ, BPM, ๋ถ„์œ„๊ธฐ ๋“ฑ์„ ์ฝค๋งˆ๋กœ ๊ตฌ๋ถ„ํ•˜์—ฌ ์ž…๋ ฅ...",
444
+ elem_id="prompt"
445
  )
446
+ with gr.Column(scale=1):
447
  enable_smart_enhancement = gr.Checkbox(
448
+ label="โœจ ์Šค๋งˆํŠธ ํ–ฅ์ƒ",
449
  value=True,
450
+ info="์ž๋™ ํƒœ๊ทธ ์ตœ์ ํ™”"
451
+ )
452
+ multi_seed_mode = gr.Dropdown(
453
+ choices=list(MULTI_SEED_OPTIONS.keys()),
454
+ value="Single",
455
+ label="๐ŸŽฒ ๋‹ค์ค‘ ์ƒ์„ฑ",
456
+ info="์—ฌ๋Ÿฌ ๋ฒˆ ์ƒ์„ฑํ•˜์—ฌ ์ตœ๊ณ  ํ’ˆ์งˆ ์„ ํƒ"
457
  )
 
 
 
 
 
 
 
 
458
 
459
+ # ๊ฐ€์‚ฌ ์„น์…˜
460
  with gr.Group():
461
+ gr.Markdown("## ๐Ÿ“ Lyrics")
 
462
 
 
463
  with gr.Row():
464
+ with gr.Column(scale=3):
465
+ topic_for_lyrics = gr.Textbox(
466
+ lines=1,
467
+ label="๊ฐ€์‚ฌ ์ฃผ์ œ (AI ์ž๋™ ์ƒ์„ฑ)",
468
+ placeholder="์˜ˆ: ์ฒซ์‚ฌ๋ž‘์˜ ์„ค๋ ˜, ์—ฌ๋ฆ„๋ฐค์˜ ์ถ”์–ต, ๋„์‹œ์˜ ๋ถˆ๋น›...",
469
+ elem_id="topic"
470
+ )
471
+ with gr.Column(scale=1):
472
+ generate_lyrics_btn = gr.Button(
473
+ "๐Ÿค– ๊ฐ€์‚ฌ ์ƒ์„ฑ",
474
+ variant="secondary",
475
+ size="sm"
476
+ )
477
 
 
478
  lyrics = gr.Textbox(
479
+ lines=10,
480
+ label="๊ฐ€์‚ฌ ์ž…๋ ฅ",
 
481
  value=LYRIC_DEFAULT,
482
+ placeholder="[verse], [chorus], [bridge] ํƒœ๊ทธ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๊ตฌ์กฐํ™”๋œ ๊ฐ€์‚ฌ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”...",
483
+ elem_id="lyrics"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
484
  )
485
 
486
+ # audio2audio ์˜ต์…˜ (๋” ๊ฐ„๊ฒฐํ•˜๊ฒŒ)
487
+ with gr.Accordion("๐ŸŽต Audio2Audio ์„ค์ •", open=False):
488
+ audio2audio_enable = gr.Checkbox(
489
+ label="Audio2Audio ํ™œ์„ฑํ™”",
490
+ value=False,
491
+ info="์ฐธ์กฐ ์˜ค๋””์˜ค๋ฅผ ์‚ฌ์šฉํ•œ ์ƒ์„ฑ"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
492
  )
493
+ lora_name_or_path = gr.Dropdown(
494
+ label="LoRA ๋ชจ๋ธ",
495
+ choices=["none", "ACE-Step/ACE-Step-v1-chinese-rap-LoRA"],
496
+ value="none",
497
+ allow_custom_value=True,
498
  )
499
+ ref_audio_input = gr.Audio(
500
+ type="filepath",
501
+ label="์ฐธ์กฐ ์˜ค๋””์˜ค",
502
+ visible=False
503
  )
504
+ ref_audio_strength = gr.Slider(
505
+ label="์ฐธ์กฐ ๊ฐ•๋„",
 
 
 
 
 
 
 
 
 
 
506
  minimum=0.0,
507
  maximum=1.0,
508
  step=0.01,
509
  value=0.5,
510
+ visible=False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
  )
512
 
513
+ # ๊ณ ๊ธ‰ ์„ค์ • (์ ‘ํ˜€์žˆ์Œ)
514
+ with gr.Accordion("โš™๏ธ ๊ณ ๊ธ‰ ์„ค์ •", open=False):
515
+ with gr.Row():
516
+ with gr.Column():
517
+ infer_step = gr.Slider(1, 300, 150, 1, label="์ถ”๋ก  ์Šคํ…")
518
+ guidance_scale = gr.Slider(0.0, 30.0, 15.0, 0.1, label="๊ฐ€์ด๋˜์Šค ์Šค์ผ€์ผ")
519
+ manual_seeds = gr.Textbox(label="์‹œ๋“œ๊ฐ’", placeholder="1,2,3,4", value=None)
520
+
521
+ with gr.Column():
522
+ scheduler_type = gr.Radio(["euler", "heun"], value="euler", label="์Šค์ผ€์ค„๋Ÿฌ")
523
+ cfg_type = gr.Radio(["cfg", "apg", "cfg_star"], value="apg", label="CFG ํƒ€์ž…")
524
+ omega_scale = gr.Slider(-100.0, 100.0, 10.0, 0.1, label="๊ทธ๋ž˜๋‰ผ๋Ÿฌ๋ฆฌํ‹ฐ ์Šค์ผ€์ผ")
 
 
525
 
526
+ with gr.Row():
527
+ with gr.Column():
528
+ use_erg_tag = gr.Checkbox(label="ERG for tag", value=True)
529
+ use_erg_lyric = gr.Checkbox(label="ERG for lyric", value=False)
530
+ use_erg_diffusion = gr.Checkbox(label="ERG for diffusion", value=True)
531
+
532
+ with gr.Column():
533
+ guidance_interval = gr.Slider(0.0, 1.0, 0.5, 0.01, label="๊ฐ€์ด๋˜์Šค ์ธํ„ฐ๋ฒŒ")
534
+ guidance_interval_decay = gr.Slider(0.0, 1.0, 0.0, 0.01, label="๊ฐ€์ด๋˜์Šค ๊ฐ์‡ ")
535
+ min_guidance_scale = gr.Slider(0.0, 200.0, 3.0, 0.1, label="์ตœ์†Œ ๊ฐ€์ด๋˜์Šค")
536
 
537
+ with gr.Row():
538
+ guidance_scale_text = gr.Slider(0.0, 10.0, 0.0, 0.1, label="ํ…์ŠคํŠธ ๊ฐ€์ด๋˜์Šค")
539
+ guidance_scale_lyric = gr.Slider(0.0, 10.0, 0.0, 0.1, label="๊ฐ€์‚ฌ ๊ฐ€์ด๋˜์Šค")
540
+ oss_steps = gr.Textbox(label="OSS Steps", placeholder="16, 29, 52...", value=None)
541
+
542
+ # ์ƒ์„ฑ ๋ฒ„ํŠผ๋“ค
543
+ with gr.Row():
544
+ sample_bnt = gr.Button("๐ŸŽฒ ์ƒ˜ํ”Œ", variant="secondary", scale=1)
545
+ preview_bnt = gr.Button("๐Ÿ‘๏ธ ๋ฏธ๋ฆฌ๋“ฃ๊ธฐ (10์ดˆ)", variant="secondary", scale=2)
546
+ text2music_bnt = gr.Button("๐ŸŽต ์Œ์•… ์ƒ์„ฑ", variant="primary", scale=3, size="lg")
547
+
548
+ # ์˜ค๋ฅธ์ชฝ ์ปฌ๋Ÿผ - ์ถœ๋ ฅ
549
+ with gr.Column(scale=5):
550
+ gr.Markdown("## ๐ŸŽง Generated Music")
551
  outputs, input_params_json = create_output_ui()
552
 
553
+ # ์ˆจ๊ฒจ์ง„ ํƒญ๋“ค (visible=False)
554
+ with gr.Tabs(visible=False):
555
+ with gr.Tab("retake", visible=False):
556
+ retake_variance = gr.Slider(
557
+ minimum=0.0, maximum=1.0, step=0.01, value=0.2, label="variance"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
558
  )
559
+ retake_seeds = gr.Textbox(
560
+ label="retake seeds (default None)", placeholder="", value=None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
561
  )
562
+ retake_bnt = gr.Button("Retake", variant="primary")
563
+ retake_outputs, retake_input_params_json = create_output_ui("Retake")
564
+
565
+ def retake_process_func(json_data, retake_variance, retake_seeds):
566
+ return enhanced_process_func(
567
+ json_data.get("audio_duration", 30),
568
+ json_data.get("prompt", ""),
569
+ json_data.get("lyrics", ""),
570
+ json_data.get("infer_step", 100),
571
+ json_data.get("guidance_scale", 15.0),
572
+ json_data.get("scheduler_type", "euler"),
573
+ json_data.get("cfg_type", "apg"),
574
+ json_data.get("omega_scale", 10.0),
575
+ retake_seeds,
576
+ json_data.get("guidance_interval", 0.5),
577
+ json_data.get("guidance_interval_decay", 0.0),
578
+ json_data.get("min_guidance_scale", 3.0),
579
+ json_data.get("use_erg_tag", True),
580
+ json_data.get("use_erg_lyric", False),
581
+ json_data.get("use_erg_diffusion", True),
582
+ json_data.get("oss_steps", None),
583
+ json_data.get("guidance_scale_text", 0.0),
584
+ json_data.get("guidance_scale_lyric", 0.0),
585
+ audio2audio_enable=json_data.get("audio2audio_enable", False),
586
+ ref_audio_strength=json_data.get("ref_audio_strength", 0.5),
587
+ ref_audio_input=json_data.get("ref_audio_input", None),
588
+ lora_name_or_path=json_data.get("lora_name_or_path", "none"),
589
+ multi_seed_mode="Best of 3",
590
+ retake_variance=retake_variance,
591
+ task="retake"
592
+ )
593
 
594
+ retake_bnt.click(
595
+ fn=retake_process_func,
596
+ inputs=[
597
+ input_params_json,
598
+ retake_variance,
599
+ retake_seeds,
600
+ ],
601
+ outputs=retake_outputs + [retake_input_params_json],
602
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
603
 
604
+ with gr.Tab("repainting", visible=False):
605
+ retake_variance = gr.Slider(
606
+ minimum=0.0, maximum=1.0, step=0.01, value=0.2, label="variance"
607
+ )
608
+ retake_seeds = gr.Textbox(
609
+ label="repaint seeds (default None)", placeholder="", value=None
610
+ )
611
+ repaint_start = gr.Slider(
612
+ minimum=0.0,
613
+ maximum=240.0,
614
+ step=0.01,
615
+ value=0.0,
616
+ label="Repaint Start Time",
617
+ interactive=True,
618
+ )
619
+ repaint_end = gr.Slider(
620
+ minimum=0.0,
621
+ maximum=240.0,
622
+ step=0.01,
623
+ value=30.0,
624
+ label="Repaint End Time",
625
+ interactive=True,
626
+ )
627
+ repaint_source = gr.Radio(
628
+ ["text2music", "last_repaint", "upload"],
629
+ value="text2music",
630
+ label="Repaint Source",
631
+ elem_id="repaint_source",
632
+ )
633
 
634
+ repaint_source_audio_upload = gr.Audio(
635
+ label="Upload Audio",
636
+ type="filepath",
637
+ visible=False,
638
+ elem_id="repaint_source_audio_upload",
639
+ show_download_button=True,
640
+ )
641
+ repaint_source.change(
642
+ fn=lambda x: gr.update(
643
+ visible=x == "upload", elem_id="repaint_source_audio_upload"
644
+ ),
645
+ inputs=[repaint_source],
646
+ outputs=[repaint_source_audio_upload],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
647
  )
648
 
649
+ repaint_bnt = gr.Button("Repaint", variant="primary")
650
+ repaint_outputs, repaint_input_params_json = create_output_ui("Repaint")
651
+
652
+ def repaint_process_func(
653
+ text2music_json_data,
654
+ repaint_json_data,
655
  retake_variance,
656
  retake_seeds,
657
  repaint_start,
 
675
  oss_steps,
676
  guidance_scale_text,
677
  guidance_scale_lyric,
678
+ ):
679
+ if repaint_source == "upload":
680
+ src_audio_path = repaint_source_audio_upload
681
+ audio_duration = librosa.get_duration(filename=src_audio_path)
682
+ json_data = {"audio_duration": audio_duration}
683
+ elif repaint_source == "text2music":
684
+ json_data = text2music_json_data
685
+ src_audio_path = json_data["audio_path"]
686
+ elif repaint_source == "last_repaint":
687
+ json_data = repaint_json_data
688
+ src_audio_path = json_data["audio_path"]
689
+
690
+ return enhanced_process_func(
691
+ json_data["audio_duration"],
692
+ prompt,
693
+ lyrics,
694
+ infer_step,
695
+ guidance_scale,
696
+ scheduler_type,
697
+ cfg_type,
698
+ omega_scale,
699
+ manual_seeds,
700
+ guidance_interval,
701
+ guidance_interval_decay,
702
+ min_guidance_scale,
703
+ use_erg_tag,
704
+ use_erg_lyric,
705
+ use_erg_diffusion,
706
+ oss_steps,
707
+ guidance_scale_text,
708
+ guidance_scale_lyric,
709
+ retake_seeds=retake_seeds,
710
+ retake_variance=retake_variance,
711
+ task="repaint",
712
+ repaint_start=repaint_start,
713
+ repaint_end=repaint_end,
714
+ src_audio_path=src_audio_path,
715
+ lora_name_or_path="none"
716
+ )
717
 
718
+ repaint_bnt.click(
719
+ fn=repaint_process_func,
720
+ inputs=[
721
+ input_params_json,
722
+ repaint_input_params_json,
723
+ retake_variance,
724
+ retake_seeds,
725
+ repaint_start,
726
+ repaint_end,
727
+ repaint_source,
728
+ repaint_source_audio_upload,
729
+ prompt,
730
+ lyrics,
731
+ infer_step,
732
+ guidance_scale,
733
+ scheduler_type,
734
+ cfg_type,
735
+ omega_scale,
736
+ manual_seeds,
737
+ guidance_interval,
738
+ guidance_interval_decay,
739
+ min_guidance_scale,
740
+ use_erg_tag,
741
+ use_erg_lyric,
742
+ use_erg_diffusion,
743
+ oss_steps,
744
+ guidance_scale_text,
745
+ guidance_scale_lyric,
746
+ ],
747
+ outputs=repaint_outputs + [repaint_input_params_json],
748
+ )
749
 
750
+ with gr.Tab("edit", visible=False):
751
+ edit_prompt = gr.Textbox(lines=2, label="Edit Tags", max_lines=4)
752
+ edit_lyrics = gr.Textbox(lines=9, label="Edit Lyrics", max_lines=13)
753
+ retake_seeds = gr.Textbox(
754
+ label="edit seeds (default None)", placeholder="", value=None
755
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
756
 
757
+ edit_type = gr.Radio(
758
+ ["only_lyrics", "remix"],
759
+ value="only_lyrics",
760
+ label="Edit Type",
761
+ elem_id="edit_type",
762
+ info="`only_lyrics` will keep the whole song the same except lyrics difference. Make your diffrence smaller, e.g. one lyrc line change.\nremix can change the song melody and genre",
763
+ )
764
+ edit_n_min = gr.Slider(
765
+ minimum=0.0,
766
+ maximum=1.0,
767
+ step=0.01,
768
+ value=0.6,
769
+ label="edit_n_min",
770
+ interactive=True,
771
+ )
772
+ edit_n_max = gr.Slider(
773
+ minimum=0.0,
774
+ maximum=1.0,
775
+ step=0.01,
776
+ value=1.0,
777
+ label="edit_n_max",
778
+ interactive=True,
779
+ )
780
 
781
+ def edit_type_change_func(edit_type):
782
+ if edit_type == "only_lyrics":
783
+ n_min = 0.6
784
+ n_max = 1.0
785
+ elif edit_type == "remix":
786
+ n_min = 0.2
787
+ n_max = 0.4
788
+ return n_min, n_max
789
+
790
+ edit_type.change(
791
+ edit_type_change_func,
792
+ inputs=[edit_type],
793
+ outputs=[edit_n_min, edit_n_max],
794
+ )
 
 
 
 
 
 
795
 
796
+ edit_source = gr.Radio(
797
+ ["text2music", "last_edit", "upload"],
798
+ value="text2music",
799
+ label="Edit Source",
800
+ elem_id="edit_source",
801
+ )
802
+ edit_source_audio_upload = gr.Audio(
803
+ label="Upload Audio",
804
+ type="filepath",
805
+ visible=False,
806
+ elem_id="edit_source_audio_upload",
807
+ show_download_button=True,
808
+ )
809
+ edit_source.change(
810
+ fn=lambda x: gr.update(
811
+ visible=x == "upload", elem_id="edit_source_audio_upload"
812
+ ),
813
+ inputs=[edit_source],
814
+ outputs=[edit_source_audio_upload],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
815
  )
816
 
817
+ edit_bnt = gr.Button("Edit", variant="primary")
818
+ edit_outputs, edit_input_params_json = create_output_ui("Edit")
819
+
820
+ def edit_process_func(
821
+ text2music_json_data,
822
  edit_input_params_json,
823
  edit_source,
824
  edit_source_audio_upload,
 
844
  guidance_scale_text,
845
  guidance_scale_lyric,
846
  retake_seeds,
847
+ ):
848
+ if edit_source == "upload":
849
+ src_audio_path = edit_source_audio_upload
850
+ audio_duration = librosa.get_duration(filename=src_audio_path)
851
+ json_data = {"audio_duration": audio_duration}
852
+ elif edit_source == "text2music":
853
+ json_data = text2music_json_data
854
+ src_audio_path = json_data["audio_path"]
855
+ elif edit_source == "last_edit":
856
+ json_data = edit_input_params_json
857
+ src_audio_path = json_data["audio_path"]
858
+
859
+ if not edit_prompt:
860
+ edit_prompt = prompt
861
+ if not edit_lyrics:
862
+ edit_lyrics = lyrics
863
+
864
+ return enhanced_process_func(
865
+ json_data["audio_duration"],
866
+ prompt,
867
+ lyrics,
868
+ infer_step,
869
+ guidance_scale,
870
+ scheduler_type,
871
+ cfg_type,
872
+ omega_scale,
873
+ manual_seeds,
874
+ guidance_interval,
875
+ guidance_interval_decay,
876
+ min_guidance_scale,
877
+ use_erg_tag,
878
+ use_erg_lyric,
879
+ use_erg_diffusion,
880
+ oss_steps,
881
+ guidance_scale_text,
882
+ guidance_scale_lyric,
883
+ task="edit",
884
+ src_audio_path=src_audio_path,
885
+ edit_target_prompt=edit_prompt,
886
+ edit_target_lyrics=edit_lyrics,
887
+ edit_n_min=edit_n_min,
888
+ edit_n_max=edit_n_max,
889
+ retake_seeds=retake_seeds,
890
+ lora_name_or_path="none"
891
+ )
892
 
893
+ edit_bnt.click(
894
+ fn=edit_process_func,
895
+ inputs=[
896
+ input_params_json,
897
+ edit_input_params_json,
898
+ edit_source,
899
+ edit_source_audio_upload,
900
+ prompt,
901
+ lyrics,
902
+ edit_prompt,
903
+ edit_lyrics,
904
+ edit_n_min,
905
+ edit_n_max,
906
+ infer_step,
907
+ guidance_scale,
908
+ scheduler_type,
909
+ cfg_type,
910
+ omega_scale,
911
+ manual_seeds,
912
+ guidance_interval,
913
+ guidance_interval_decay,
914
+ min_guidance_scale,
915
+ use_erg_tag,
916
+ use_erg_lyric,
917
+ use_erg_diffusion,
918
+ oss_steps,
919
+ guidance_scale_text,
920
+ guidance_scale_lyric,
921
+ retake_seeds,
922
+ ],
923
+ outputs=edit_outputs + [edit_input_params_json],
924
+ )
925
 
926
+ with gr.Tab("extend", visible=False):
927
+ extend_seeds = gr.Textbox(
928
+ label="extend seeds (default None)", placeholder="", value=None
929
+ )
930
+ left_extend_length = gr.Slider(
931
+ minimum=0.0,
932
+ maximum=240.0,
933
+ step=0.01,
934
+ value=0.0,
935
+ label="Left Extend Length",
936
+ interactive=True,
937
+ )
938
+ right_extend_length = gr.Slider(
939
+ minimum=0.0,
940
+ maximum=240.0,
941
+ step=0.01,
942
+ value=30.0,
943
+ label="Right Extend Length",
944
+ interactive=True,
945
+ )
946
+ extend_source = gr.Radio(
947
+ ["text2music", "last_extend", "upload"],
948
+ value="text2music",
949
+ label="Extend Source",
950
+ elem_id="extend_source",
951
+ )
952
 
953
+ extend_source_audio_upload = gr.Audio(
954
+ label="Upload Audio",
955
+ type="filepath",
956
+ visible=False,
957
+ elem_id="extend_source_audio_upload",
958
+ show_download_button=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
959
  )
960
+ extend_source.change(
961
+ fn=lambda x: gr.update(
962
+ visible=x == "upload", elem_id="extend_source_audio_upload"
963
+ ),
964
+ inputs=[extend_source],
965
+ outputs=[extend_source_audio_upload],
966
+ )
967
+
968
+ extend_bnt = gr.Button("Extend", variant="primary")
969
+ extend_outputs, extend_input_params_json = create_output_ui("Extend")
970
 
971
+ def extend_process_func(
972
+ text2music_json_data,
 
 
973
  extend_input_params_json,
974
  extend_seeds,
975
  left_extend_length,
 
993
  oss_steps,
994
  guidance_scale_text,
995
  guidance_scale_lyric,
996
+ ):
997
+ if extend_source == "upload":
998
+ src_audio_path = extend_source_audio_upload
999
+ # get audio duration
1000
+ audio_duration = librosa.get_duration(filename=src_audio_path)
1001
+ json_data = {"audio_duration": audio_duration}
1002
+ elif extend_source == "text2music":
1003
+ json_data = text2music_json_data
1004
+ src_audio_path = json_data["audio_path"]
1005
+ elif extend_source == "last_extend":
1006
+ json_data = extend_input_params_json
1007
+ src_audio_path = json_data["audio_path"]
1008
+
1009
+ repaint_start = -left_extend_length
1010
+ repaint_end = json_data["audio_duration"] + right_extend_length
1011
+ return enhanced_process_func(
1012
+ json_data["audio_duration"],
1013
+ prompt,
1014
+ lyrics,
1015
+ infer_step,
1016
+ guidance_scale,
1017
+ scheduler_type,
1018
+ cfg_type,
1019
+ omega_scale,
1020
+ manual_seeds,
1021
+ guidance_interval,
1022
+ guidance_interval_decay,
1023
+ min_guidance_scale,
1024
+ use_erg_tag,
1025
+ use_erg_lyric,
1026
+ use_erg_diffusion,
1027
+ oss_steps,
1028
+ guidance_scale_text,
1029
+ guidance_scale_lyric,
1030
+ retake_seeds=extend_seeds,
1031
+ retake_variance=1.0,
1032
+ task="extend",
1033
+ repaint_start=repaint_start,
1034
+ repaint_end=repaint_end,
1035
+ src_audio_path=src_audio_path,
1036
+ lora_name_or_path="none"
1037
+ )
1038
+
1039
+ extend_bnt.click(
1040
+ fn=extend_process_func,
1041
+ inputs=[
1042
+ input_params_json,
1043
+ extend_input_params_json,
1044
+ extend_seeds,
1045
+ left_extend_length,
1046
+ right_extend_length,
1047
+ extend_source,
1048
+ extend_source_audio_upload,
1049
+ prompt,
1050
+ lyrics,
1051
+ infer_step,
1052
+ guidance_scale,
1053
+ scheduler_type,
1054
+ cfg_type,
1055
+ omega_scale,
1056
+ manual_seeds,
1057
+ guidance_interval,
1058
+ guidance_interval_decay,
1059
+ min_guidance_scale,
1060
+ use_erg_tag,
1061
+ use_erg_lyric,
1062
+ use_erg_diffusion,
1063
+ oss_steps,
1064
+ guidance_scale_text,
1065
+ guidance_scale_lyric,
1066
+ ],
1067
+ outputs=extend_outputs + [extend_input_params_json],
1068
+ )
1069
+
1070
+ # ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ๋“ค
1071
+ def toggle_ref_audio_visibility(is_checked):
1072
+ return (
1073
+ gr.update(visible=is_checked),
1074
+ gr.update(visible=is_checked),
1075
+ )
1076
+
1077
+ audio2audio_enable.change(
1078
+ fn=toggle_ref_audio_visibility,
1079
+ inputs=[audio2audio_enable],
1080
+ outputs=[ref_audio_input, ref_audio_strength],
1081
+ )
1082
+
1083
+ genre_preset.change(
1084
+ fn=update_tags_from_preset,
1085
+ inputs=[genre_preset],
1086
+ outputs=[prompt]
1087
+ )
1088
+
1089
+ quality_preset.change(
1090
+ fn=lambda x: QUALITY_PRESETS.get(x, {}).get("description", ""),
1091
+ inputs=[quality_preset],
1092
+ outputs=[preset_description]
1093
+ )
1094
+
1095
+ quality_preset.change(
1096
+ fn=update_quality_preset,
1097
+ inputs=[quality_preset],
1098
+ outputs=[infer_step, guidance_scale, scheduler_type, omega_scale, use_erg_diffusion, use_erg_tag]
1099
+ )
1100
+
1101
+ # ๊ฐ€์‚ฌ ์ƒ์„ฑ ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ
1102
+ generate_lyrics_btn.click(
1103
+ fn=openai_generate_lyrics,
1104
+ inputs=[topic_for_lyrics],
1105
+ outputs=[lyrics]
1106
+ )
1107
 
1108
+ # ํ”„๋ฆฌ๋ทฐ ๊ธฐ๋Šฅ
1109
+ def generate_preview(prompt, lyrics, genre_preset):
1110
+ """10์ดˆ ํ”„๋ฆฌ๋ทฐ ์ƒ์„ฑ"""
1111
+ preview_params = {
1112
+ "audio_duration": 10,
1113
+ "infer_step": 50,
1114
+ "guidance_scale": 12.0,
1115
+ "scheduler_type": "euler",
1116
+ "cfg_type": "apg",
1117
+ "omega_scale": 5.0,
1118
+ }
1119
+
1120
+ enhanced_prompt = enhance_prompt_with_genre(prompt, genre_preset) if genre_preset != "Custom" else prompt
1121
+
1122
+ try:
1123
+ # ์‹ค์ œ ๊ตฌํ˜„์—์„œ๋Š” ๋น ๋ฅธ ์ƒ์„ฑ ๋ชจ๋“œ ์‚ฌ์šฉ
1124
+ result = enhanced_process_func(
1125
+ preview_params["audio_duration"],
1126
+ enhanced_prompt,
1127
+ lyrics[:200], # ๊ฐ€์‚ฌ ์ผ๋ถ€๋งŒ ์‚ฌ์šฉ
1128
+ preview_params["infer_step"],
1129
+ preview_params["guidance_scale"],
1130
+ preview_params["scheduler_type"],
1131
+ preview_params["cfg_type"],
1132
+ preview_params["omega_scale"],
1133
+ None, # manual_seeds
1134
+ 0.5, # guidance_interval
1135
+ 0.0, # guidance_interval_decay
1136
+ 3.0, # min_guidance_scale
1137
+ True, # use_erg_tag
1138
+ False, # use_erg_lyric
1139
+ True, # use_erg_diffusion
1140
+ None, # oss_steps
1141
+ 0.0, # guidance_scale_text
1142
+ 0.0, # guidance_scale_lyric
1143
+ multi_seed_mode="Single"
 
 
 
 
 
 
 
1144
  )
1145
+ return result[0] if result else None
1146
+ except Exception as e:
1147
+ return f"ํ”„๋ฆฌ๋ทฐ ์ƒ์„ฑ ์‹คํŒจ: {str(e)}"
1148
 
1149
+ preview_bnt.click(
1150
+ fn=generate_preview,
1151
+ inputs=[prompt, lyrics, genre_preset],
1152
+ outputs=[outputs[0]]
1153
+ )
1154
+
1155
+ def json2output(json_data):
1156
+ return (
1157
+ json_data["audio_duration"],
1158
+ json_data["prompt"],
1159
+ json_data["lyrics"],
1160
+ json_data["infer_step"],
1161
+ json_data["guidance_scale"],
1162
+ json_data["scheduler_type"],
1163
+ json_data["cfg_type"],
1164
+ json_data["omega_scale"],
1165
+ ", ".join(map(str, json_data["actual_seeds"])),
1166
+ json_data["guidance_interval"],
1167
+ json_data["guidance_interval_decay"],
1168
+ json_data["min_guidance_scale"],
1169
+ json_data["use_erg_tag"],
1170
+ json_data["use_erg_lyric"],
1171
+ json_data["use_erg_diffusion"],
1172
+ ", ".join(map(str, json_data["oss_steps"])),
1173
+ (
1174
+ json_data["guidance_scale_text"]
1175
+ if "guidance_scale_text" in json_data
1176
+ else 0.0
1177
+ ),
1178
+ (
1179
+ json_data["guidance_scale_lyric"]
1180
+ if "guidance_scale_lyric" in json_data
1181
+ else 0.0
1182
+ ),
1183
+ (
1184
+ json_data["audio2audio_enable"]
1185
+ if "audio2audio_enable" in json_data
1186
+ else False
1187
+ ),
1188
+ (
1189
+ json_data["ref_audio_strength"]
1190
+ if "ref_audio_strength" in json_data
1191
+ else 0.5
1192
+ ),
1193
+ (
1194
+ json_data["ref_audio_input"]
1195
+ if "ref_audio_input" in json_data
1196
+ else None
1197
+ ),
1198
  )
1199
 
1200
+ def sample_data(lora_name_or_path_):
1201
+ if sample_data_func:
1202
+ json_data = sample_data_func()
1203
+ return json2output(json_data)
1204
+ return {}
1205
+
1206
+ sample_bnt.click(
1207
+ sample_data,
1208
+ inputs=[lora_name_or_path],
1209
+ outputs=[
1210
+ audio_duration,
1211
+ prompt,
1212
+ lyrics,
1213
+ infer_step,
1214
+ guidance_scale,
1215
+ scheduler_type,
1216
+ cfg_type,
1217
+ omega_scale,
1218
+ manual_seeds,
1219
+ guidance_interval,
1220
+ guidance_interval_decay,
1221
+ min_guidance_scale,
1222
+ use_erg_tag,
1223
+ use_erg_lyric,
1224
+ use_erg_diffusion,
1225
+ oss_steps,
1226
+ guidance_scale_text,
1227
+ guidance_scale_lyric,
1228
+ audio2audio_enable,
1229
+ ref_audio_strength,
1230
+ ref_audio_input,
1231
+ ],
1232
+ )
1233
+
1234
+ # ๋ฉ”์ธ ์ƒ์„ฑ ๋ฒ„ํŠผ ์ด๋ฒคํŠธ
1235
  text2music_bnt.click(
1236
  fn=enhanced_process_func,
1237
  inputs=[
 
1271
  load_data_func=dump_func,
1272
  ):
1273
  with gr.Blocks(
1274
+ title="ACE-Step Model 1.0 - Enhanced",
1275
+ theme=gr.themes.Soft(
1276
+ primary_hue="blue",
1277
+ secondary_hue="gray",
1278
+ font=["Helvetica", "ui-sans-serif", "system-ui", "sans-serif"],
1279
+ ),
1280
  css="""
1281
  .gradio-container {
1282
+ max-width: 1400px !important;
1283
+ margin: auto !important;
1284
  }
1285
+
1286
+ /* ๊ทธ๋ฃน ์Šคํƒ€์ผ๋ง */
1287
+ .gr-group {
1288
+ border: 1px solid #e5e7eb !important;
1289
+ border-radius: 8px !important;
1290
+ padding: 16px !important;
1291
+ margin-bottom: 16px !important;
1292
+ background: white !important;
1293
+ }
1294
+
1295
+ /* ํ—ค๋” ์Šคํƒ€์ผ */
1296
+ h1 {
1297
+ background: linear-gradient(45deg, #2563eb, #7c3aed);
1298
+ -webkit-background-clip: text;
1299
+ -webkit-text-fill-color: transparent;
1300
+ text-align: center;
1301
+ font-size: 2.5rem !important;
1302
+ margin-bottom: 0.5rem !important;
1303
+ }
1304
+
1305
+ h2 {
1306
+ color: #1f2937 !important;
1307
+ font-size: 1.5rem !important;
1308
+ margin-bottom: 1rem !important;
1309
+ font-weight: 600 !important;
1310
+ }
1311
+
1312
+ /* ๋ฒ„ํŠผ ์Šคํƒ€์ผ */
1313
+ .primary {
1314
+ background: linear-gradient(45deg, #2563eb, #3b82f6) !important;
1315
+ color: white !important;
1316
+ font-weight: 600 !important;
1317
+ transition: all 0.3s ease !important;
1318
+ }
1319
+
1320
+ .primary:hover {
1321
+ transform: translateY(-2px) !important;
1322
+ box-shadow: 0 4px 12px rgba(59, 130, 246, 0.4) !important;
1323
+ }
1324
+
1325
+ .secondary {
1326
+ background: #f3f4f6 !important;
1327
+ color: #374151 !important;
1328
+ border: 1px solid #e5e7eb !important;
1329
+ }
1330
+
1331
+ /* ์ž…๋ ฅ ํ•„๋“œ ์Šคํƒ€์ผ */
1332
+ input, textarea, .gr-box {
1333
+ border-radius: 6px !important;
1334
+ }
1335
+
1336
+ /* ์•„์ฝ”๋””์–ธ ์Šคํƒ€์ผ */
1337
+ .gr-accordion {
1338
+ border-radius: 8px !important;
1339
+ overflow: hidden !important;
1340
+ }
1341
+
1342
+ /* ํƒœ๊ทธ ๋ผ๋ฒจ ์Šคํƒ€์ผ */
1343
+ label {
1344
+ font-weight: 500 !important;
1345
+ color: #374151 !important;
1346
+ }
1347
+
1348
+ /* ํ€„๋ฆฌํ‹ฐ ์„ค๋ช… ๋ฐ•์Šค */
1349
+ #component-preset_description textarea {
1350
+ background: linear-gradient(45deg, #f0f9ff, #e0f2fe) !important;
1351
+ border: none !important;
1352
+ font-style: italic !important;
1353
  }
1354
  """
1355
  ) as demo:
1356
  gr.Markdown(
1357
  """
1358
+ <h1>๐ŸŽต ACE-Step PRO</h1>
1359
+ <div style="text-align: center; margin: 20px 0 30px 0;">
1360
+ <p style="color: #6b7280; font-size: 1.1rem;">
1361
+ <strong>๐Ÿš€ Enhanced Features:</strong> AI ๊ฐ€์‚ฌ ์ƒ์„ฑ | ์Šค๋งˆํŠธ ํ”„๋กฌํ”„ํŠธ | ํ’ˆ์งˆ ํ”„๋ฆฌ์…‹ | ๋‹ค์ค‘ ์ƒ์„ฑ ๋ชจ๋“œ
 
 
 
1362
  </p>
1363
  </div>
1364
+ """
1365
  )
1366
 
1367
+ # ๋ฉ”์ธ ํƒญ
1368
+ with gr.Tab("๐ŸŽต Music Generation"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1369
  create_text2music_ui(
1370
  gr=gr,
1371
  text2music_process_func=text2music_process_func,
1372
  sample_data_func=sample_data_func,
1373
  load_data_func=load_data_func,
1374
  )
1375
+
1376
+ # ๊ฐ€์ด๋“œ ํƒญ ์ถ”๊ฐ€
1377
+ with gr.Tab("๐Ÿ“– ์‚ฌ์šฉ๋ฒ• ๊ฐ€์ด๋“œ"):
1378
+ gr.Markdown("""
1379
+ ## ๐ŸŽฏ ๋น ๋ฅธ ์‹œ์ž‘ ๊ฐ€์ด๋“œ
1380
+
1381
+ ### 1. ๊ธฐ๋ณธ ์‚ฌ์šฉ๋ฒ•
1382
+ - **์žฅ๋ฅด ์„ ํƒ**: ์›ํ•˜๋Š” ์Œ์•… ์žฅ๋ฅด๋ฅผ ์„ ํƒํ•˜๋ฉด ์ž๋™์œผ๋กœ ์ตœ์ ํ™”๋œ ํƒœ๊ทธ๊ฐ€ ์ ์šฉ๋ฉ๋‹ˆ๋‹ค
1383
+ - **ํ’ˆ์งˆ ์„ค์ •**: ์šฉ๋„์— ๋งž๋Š” ํ’ˆ์งˆ์„ ์„ ํƒํ•˜์„ธ์š”
1384
+ - Draft: ๋น ๋ฅธ ํ…Œ์ŠคํŠธ (1-2๋ถ„)
1385
+ - Standard: ์ผ๋ฐ˜ ์‚ฌ์šฉ (3-5๋ถ„)
1386
+ - High Quality: ๊ณ ํ’ˆ์งˆ (8-12๋ถ„)
1387
+ - Ultra: ์ตœ๊ณ  ํ’ˆ์งˆ (15-20๋ถ„)
1388
+
1389
+ ### 2. AI ๊ฐ€์‚ฌ ์ƒ์„ฑ
1390
+ - ๊ฐ€์‚ฌ ์ฃผ์ œ๋ฅผ ์ž…๋ ฅํ•˜๊ณ  "๐Ÿค– ๊ฐ€์‚ฌ ์ƒ์„ฑ" ๋ฒ„ํŠผ์„ ํด๋ฆญํ•˜๋ฉด AI๊ฐ€ ์ž๋™์œผ๋กœ ๊ตฌ์กฐํ™”๋œ ๊ฐ€์‚ฌ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค
1391
+ - ์ƒ์„ฑ๋œ ๊ฐ€์‚ฌ๋Š” ์ž์œ ๋กญ๊ฒŒ ์ˆ˜์ • ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค
1392
+
1393
+ ### 3. ๋‹ค์ค‘ ์ƒ์„ฑ ๋ชจ๋“œ
1394
+ - "Best of 3/5/10"์„ ์„ ํƒํ•˜๋ฉด ์—ฌ๋Ÿฌ ๋ฒˆ ์ƒ์„ฑํ•˜์—ฌ ๊ฐ€์žฅ ์ข‹์€ ํ’ˆ์งˆ์˜ ๊ฒฐ๊ณผ๋ฅผ ์ž๋™์œผ๋กœ ์„ ํƒํ•ฉ๋‹ˆ๋‹ค
1395
+ - ๋” ๋‚˜์€ ๊ฒฐ๊ณผ๋ฅผ ์›ํ•  ๋•Œ ์œ ์šฉํ•ฉ๋‹ˆ๋‹ค
1396
+
1397
+ ### 4. ํ”„๋ฆฌ๋ทฐ ๊ธฐ๋Šฅ
1398
+ - "๐Ÿ‘๏ธ ๋ฏธ๋ฆฌ๋“ฃ๊ธฐ" ๋ฒ„ํŠผ์œผ๋กœ 10์ดˆ ์ƒ˜ํ”Œ์„ ๋น ๋ฅด๊ฒŒ ์ƒ์„ฑํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค
1399
+ - ์ „์ฒด ๊ณก ์ƒ์„ฑ ์ „์— ์Šคํƒ€์ผ์„ ํ™•์ธํ•  ๋•Œ ์œ ์šฉํ•ฉ๋‹ˆ๋‹ค
1400
+
1401
+ ### ๐Ÿ’ก ํ’ˆ์งˆ ํ–ฅ์ƒ ํŒ
1402
+ 1. **๊ณ ํ’ˆ์งˆ ์ƒ์„ฑ**: "High Quality" + "Best of 5" ์กฐํ•ฉ ์ถ”์ฒœ
1403
+ 2. **๋น ๋ฅธ ํ…Œ์ŠคํŠธ**: "Draft" + "ํ”„๋ฆฌ๋ทฐ" ๊ธฐ๋Šฅ ํ™œ์šฉ
1404
+ 3. **์žฅ๋ฅด ํŠนํ™”**: ์žฅ๋ฅด ํ”„๋ฆฌ์…‹ ์„ ํƒ ํ›„ "์Šค๋งˆํŠธ ํ–ฅ์ƒ" ์ฒดํฌ
1405
+ 4. **๊ฐ€์‚ฌ ๊ตฌ์กฐ**: [verse], [chorus], [bridge] ํƒœ๊ทธ๋ฅผ ์ ๊ทน ํ™œ์šฉํ•˜์„ธ์š”
1406
+
1407
+ ### ๐ŸŽต ๊ฐ€์‚ฌ ๊ตฌ์กฐ ํƒœ๊ทธ
1408
+ - `[verse]`: ์ ˆ (์ด์•ผ๊ธฐ ์ „๊ฐœ)
1409
+ - `[chorus]`: ํ›„๋ ด๊ตฌ (๋ฐ˜๋ณต๋˜๋Š” ๋ฉ”์ธ ๋ฉœ๋กœ๋””)
1410
+ - `[bridge]`: ๋ธŒ๋ฆฟ์ง€ (์ „ํ™˜๋ถ€)
1411
+ - `[instrumental]` or `[inst]`: ์—ฐ์ฃผ ๊ตฌ๊ฐ„
1412
+ """)
1413
+
1414
  return demo
1415
 
1416
 
 
1420
  server_name="0.0.0.0",
1421
  server_port=7860,
1422
  share=True # ๊ณต์œ  ๋งํฌ ์ƒ์„ฑ
1423
+ )