Spaces:
VIDraft
/
Running on Zero

ginipick commited on
Commit
d801e4d
ยท
verified ยท
1 Parent(s): ae3a1ff

Update ui/components.py

Browse files
Files changed (1) hide show
  1. ui/components.py +106 -658
ui/components.py CHANGED
@@ -441,6 +441,9 @@ def create_text2music_ui(
441
  ):
442
  # ํ–ฅ์ƒ๋œ ํ”„๋กœ์„ธ์Šค ํ•จ์ˆ˜ ์ƒ์„ฑ
443
  enhanced_process_func = create_enhanced_process_func(text2music_process_func)
 
 
 
444
 
445
  with gr.Row():
446
  with gr.Column():
@@ -448,14 +451,14 @@ def create_text2music_ui(
448
  with gr.Group():
449
  gr.Markdown("### โšก ํ’ˆ์งˆ & ์„ฑ๋Šฅ ์„ค์ •")
450
  with gr.Row():
451
- quality_preset = gr.Dropdown(
452
  choices=list(QUALITY_PRESETS.keys()),
453
  value="Standard",
454
  label="ํ’ˆ์งˆ ํ”„๋ฆฌ์…‹",
455
  scale=2,
456
  interactive=True
457
  )
458
- multi_seed_mode = gr.Dropdown(
459
  choices=list(MULTI_SEED_OPTIONS.keys()),
460
  value="Single",
461
  label="๋‹ค์ค‘ ์ƒ์„ฑ ๋ชจ๋“œ",
@@ -464,7 +467,7 @@ def create_text2music_ui(
464
  interactive=True
465
  )
466
 
467
- preset_description = gr.Textbox(
468
  value=QUALITY_PRESETS["Standard"]["description"],
469
  label="์„ค๋ช…",
470
  interactive=False,
@@ -472,8 +475,7 @@ def create_text2music_ui(
472
  )
473
 
474
  with gr.Row(equal_height=True):
475
- # add markdown, tags and lyrics examples are from ai music generation community
476
- audio_duration = gr.Slider(
477
  -1,
478
  240.0,
479
  step=0.00001,
@@ -483,32 +485,32 @@ def create_text2music_ui(
483
  info="-1 means random duration (30 ~ 240).",
484
  scale=7,
485
  )
486
- random_bnt = gr.Button("๐ŸŽฒ Random", variant="secondary", scale=1)
487
- preview_bnt = gr.Button("๐ŸŽต Preview", variant="secondary", scale=2)
488
 
489
  # audio2audio
490
  with gr.Row(equal_height=True):
491
- audio2audio_enable = gr.Checkbox(
492
  label="Enable Audio2Audio",
493
  value=False,
494
  info="Check to enable Audio-to-Audio generation using a reference audio.",
495
  elem_id="audio2audio_checkbox"
496
  )
497
- lora_name_or_path = gr.Dropdown(
498
  label="Lora Name or Path",
499
  choices=["ACE-Step/ACE-Step-v1-chinese-rap-LoRA", "none"],
500
  value="none",
501
  allow_custom_value=True,
502
  )
503
 
504
- ref_audio_input = gr.Audio(
505
  type="filepath",
506
  label="Reference Audio (for Audio2Audio)",
507
  visible=False,
508
  elem_id="ref_audio_input",
509
  show_download_button=True
510
  )
511
- ref_audio_strength = gr.Slider(
512
  label="Refer audio strength",
513
  minimum=0.0,
514
  maximum=1.0,
@@ -519,46 +521,34 @@ def create_text2music_ui(
519
  interactive=True,
520
  )
521
 
522
- def toggle_ref_audio_visibility(is_checked):
523
- return (
524
- gr.update(visible=is_checked),
525
- gr.update(visible=is_checked),
526
- )
527
-
528
- audio2audio_enable.change(
529
- fn=toggle_ref_audio_visibility,
530
- inputs=[audio2audio_enable],
531
- outputs=[ref_audio_input, ref_audio_strength],
532
- )
533
-
534
  with gr.Column(scale=2):
535
  with gr.Group():
536
  gr.Markdown("""### ๐ŸŽผ ์Šค๋งˆํŠธ ํ”„๋กฌํ”„ํŠธ ์‹œ์Šคํ…œ
537
  <center>์žฅ๋ฅด์™€ ์Šคํƒ€์ผ์„ ์„ ํƒํ•˜๋ฉด ์ž๋™์œผ๋กœ ์ตœ์ ํ™”๋œ ํƒœ๊ทธ๊ฐ€ ์ถ”๊ฐ€๋ฉ๋‹ˆ๋‹ค.</center>""")
538
 
539
  with gr.Row():
540
- genre_preset = gr.Dropdown(
541
  choices=["Custom"] + list(GENRE_PRESETS.keys()),
542
  value="Custom",
543
  label="์žฅ๋ฅด ํ”„๋ฆฌ์…‹",
544
  scale=1,
545
  interactive=True
546
  )
547
- song_style = gr.Dropdown(
548
  choices=list(SONG_STYLES.keys()),
549
  value="๋“€์—ฃ (๋‚จ๋…€ ํ˜ผ์„ฑ)",
550
  label="๊ณก ์Šคํƒ€์ผ",
551
  scale=1,
552
  interactive=True
553
  )
554
- enable_smart_enhancement = gr.Checkbox(
555
  label="์Šค๋งˆํŠธ ํ–ฅ์ƒ",
556
  value=True,
557
  info="์ž๋™ ํƒœ๊ทธ ์ตœ์ ํ™”",
558
  scale=1
559
  )
560
 
561
- prompt = gr.Textbox(
562
  lines=2,
563
  label="Tags",
564
  max_lines=4,
@@ -572,15 +562,15 @@ def create_text2music_ui(
572
  <center>์ฃผ์ œ๋ฅผ ์ž…๋ ฅํ•˜๊ณ  'AI ์ž‘์‚ฌ' ๋ฒ„ํŠผ์„ ํด๋ฆญํ•˜๋ฉด ์ž๋™์œผ๋กœ ๊ฐ€์‚ฌ๊ฐ€ ์ƒ์„ฑ๋ฉ๋‹ˆ๋‹ค.</center>""")
573
 
574
  with gr.Row():
575
- lyric_prompt = gr.Textbox(
576
  label="์ž‘์‚ฌ ์ฃผ์ œ",
577
  placeholder="์˜ˆ: ์ฒซ์‚ฌ๋ž‘์˜ ์„ค๋ ˜, ์ด๋ณ„์˜ ์•„ํ””, ํฌ๋ง์ฐฌ ๋‚ด์ผ...",
578
  scale=3,
579
  interactive=True
580
  )
581
- generate_lyrics_btn = gr.Button("๐Ÿค– AI ์ž‘์‚ฌ", variant="secondary", scale=1)
582
 
583
- lyrics = gr.Textbox(
584
  lines=9,
585
  label="Lyrics",
586
  max_lines=13,
@@ -590,7 +580,7 @@ def create_text2music_ui(
590
  )
591
 
592
  with gr.Accordion("Basic Settings", open=False):
593
- infer_step = gr.Slider(
594
  minimum=1,
595
  maximum=300,
596
  step=1,
@@ -598,7 +588,7 @@ def create_text2music_ui(
598
  label="Infer Steps",
599
  interactive=True,
600
  )
601
- guidance_scale = gr.Slider(
602
  minimum=0.0,
603
  maximum=30.0,
604
  step=0.1,
@@ -607,7 +597,7 @@ def create_text2music_ui(
607
  interactive=True,
608
  info="When guidance_scale_lyric > 1 and guidance_scale_text > 1, the guidance scale will not be applied.",
609
  )
610
- guidance_scale_text = gr.Slider(
611
  minimum=0.0,
612
  maximum=10.0,
613
  step=0.1,
@@ -616,7 +606,7 @@ def create_text2music_ui(
616
  interactive=True,
617
  info="Guidance scale for text condition. It can only apply to cfg. set guidance_scale_text=5.0, guidance_scale_lyric=1.5 for start",
618
  )
619
- guidance_scale_lyric = gr.Slider(
620
  minimum=0.0,
621
  maximum=10.0,
622
  step=0.1,
@@ -625,7 +615,7 @@ def create_text2music_ui(
625
  interactive=True,
626
  )
627
 
628
- manual_seeds = gr.Textbox(
629
  label="manual seeds (default None)",
630
  placeholder="1,2,3,4",
631
  value=None,
@@ -633,37 +623,37 @@ def create_text2music_ui(
633
  )
634
 
635
  with gr.Accordion("Advanced Settings", open=False):
636
- scheduler_type = gr.Radio(
637
  ["euler", "heun"],
638
  value="euler",
639
  label="Scheduler Type",
640
  elem_id="scheduler_type",
641
  info="Scheduler type for the generation. euler is recommended. heun will take more time.",
642
  )
643
- cfg_type = gr.Radio(
644
  ["cfg", "apg", "cfg_star"],
645
  value="apg",
646
  label="CFG Type",
647
  elem_id="cfg_type",
648
  info="CFG type for the generation. apg is recommended. cfg and cfg_star are almost the same.",
649
  )
650
- use_erg_tag = gr.Checkbox(
651
  label="use ERG for tag",
652
  value=True,
653
  info="Use Entropy Rectifying Guidance for tag. It will multiple a temperature to the attention to make a weaker tag condition and make better diversity.",
654
  )
655
- use_erg_lyric = gr.Checkbox(
656
  label="use ERG for lyric",
657
  value=False,
658
  info="The same but apply to lyric encoder's attention.",
659
  )
660
- use_erg_diffusion = gr.Checkbox(
661
  label="use ERG for diffusion",
662
  value=True,
663
  info="The same but apply to diffusion model's attention.",
664
  )
665
 
666
- omega_scale = gr.Slider(
667
  minimum=-100.0,
668
  maximum=100.0,
669
  step=0.1,
@@ -673,7 +663,7 @@ def create_text2music_ui(
673
  info="Granularity scale for the generation. Higher values can reduce artifacts",
674
  )
675
 
676
- guidance_interval = gr.Slider(
677
  minimum=0.0,
678
  maximum=1.0,
679
  step=0.01,
@@ -682,7 +672,7 @@ def create_text2music_ui(
682
  interactive=True,
683
  info="Guidance interval for the generation. 0.5 means only apply guidance in the middle steps (0.25 * infer_steps to 0.75 * infer_steps)",
684
  )
685
- guidance_interval_decay = gr.Slider(
686
  minimum=0.0,
687
  maximum=1.0,
688
  step=0.01,
@@ -691,7 +681,7 @@ def create_text2music_ui(
691
  interactive=True,
692
  info="Guidance interval decay for the generation. Guidance scale will decay from guidance_scale to min_guidance_scale in the interval. 0.0 means no decay.",
693
  )
694
- min_guidance_scale = gr.Slider(
695
  minimum=0.0,
696
  maximum=200.0,
697
  step=0.1,
@@ -700,666 +690,124 @@ def create_text2music_ui(
700
  interactive=True,
701
  info="Min guidance scale for guidance interval decay's end scale",
702
  )
703
- oss_steps = gr.Textbox(
704
  label="OSS Steps",
705
  placeholder="16, 29, 52, 96, 129, 158, 172, 183, 189, 200",
706
  value=None,
707
  info="Optimal Steps for the generation. But not test well",
708
  )
709
 
710
- text2music_bnt = gr.Button("๐ŸŽต Generate Music", variant="primary", size="lg")
711
 
712
  with gr.Column():
713
  outputs, input_params_json = create_output_ui()
714
 
715
- with gr.Tab("retake"):
716
- retake_variance = gr.Slider(
717
- minimum=0.0, maximum=1.0, step=0.01, value=0.2, label="variance"
718
- )
719
- retake_seeds = gr.Textbox(
720
- label="retake seeds (default None)", placeholder="", value=None
721
- )
722
- retake_bnt = gr.Button("Retake", variant="primary")
723
- retake_outputs, retake_input_params_json = create_output_ui("Retake")
724
-
725
- def retake_process_func(json_data, retake_variance, retake_seeds):
726
- return enhanced_process_func(
727
- json_data.get("audio_duration", 30),
728
- json_data.get("prompt", ""),
729
- json_data.get("lyrics", ""),
730
- json_data.get("infer_step", 100),
731
- json_data.get("guidance_scale", 15.0),
732
- json_data.get("scheduler_type", "euler"),
733
- json_data.get("cfg_type", "apg"),
734
- json_data.get("omega_scale", 10.0),
735
- retake_seeds,
736
- json_data.get("guidance_interval", 0.5),
737
- json_data.get("guidance_interval_decay", 0.0),
738
- json_data.get("min_guidance_scale", 3.0),
739
- json_data.get("use_erg_tag", True),
740
- json_data.get("use_erg_lyric", False),
741
- json_data.get("use_erg_diffusion", True),
742
- json_data.get("oss_steps", None),
743
- json_data.get("guidance_scale_text", 0.0),
744
- json_data.get("guidance_scale_lyric", 0.0),
745
- audio2audio_enable=json_data.get("audio2audio_enable", False),
746
- ref_audio_strength=json_data.get("ref_audio_strength", 0.5),
747
- ref_audio_input=json_data.get("ref_audio_input", None),
748
- lora_name_or_path=json_data.get("lora_name_or_path", "none"),
749
- multi_seed_mode="Best of 3", # retake๋Š” ์ž๋™์œผ๋กœ ๋‹ค์ค‘ ์ƒ์„ฑ
750
- retake_variance=retake_variance,
751
- task="retake"
752
- )
753
-
754
- retake_bnt.click(
755
- fn=retake_process_func,
756
- inputs=[
757
- input_params_json,
758
- retake_variance,
759
- retake_seeds,
760
- ],
761
- outputs=retake_outputs + [retake_input_params_json],
762
- )
763
-
764
- with gr.Tab("repainting"):
765
- retake_variance = gr.Slider(
766
- minimum=0.0, maximum=1.0, step=0.01, value=0.2, label="variance"
767
- )
768
- retake_seeds = gr.Textbox(
769
- label="repaint seeds (default None)", placeholder="", value=None
770
- )
771
- repaint_start = gr.Slider(
772
- minimum=0.0,
773
- maximum=240.0,
774
- step=0.01,
775
- value=0.0,
776
- label="Repaint Start Time",
777
- interactive=True,
778
- )
779
- repaint_end = gr.Slider(
780
- minimum=0.0,
781
- maximum=240.0,
782
- step=0.01,
783
- value=30.0,
784
- label="Repaint End Time",
785
- interactive=True,
786
- )
787
- repaint_source = gr.Radio(
788
- ["text2music", "last_repaint", "upload"],
789
- value="text2music",
790
- label="Repaint Source",
791
- elem_id="repaint_source",
792
- )
793
 
794
- repaint_source_audio_upload = gr.Audio(
795
- label="Upload Audio",
796
- type="filepath",
797
- visible=False,
798
- elem_id="repaint_source_audio_upload",
799
- show_download_button=True,
800
- )
801
- repaint_source.change(
802
- fn=lambda x: gr.update(
803
- visible=x == "upload", elem_id="repaint_source_audio_upload"
804
- ),
805
- inputs=[repaint_source],
806
- outputs=[repaint_source_audio_upload],
807
- )
808
-
809
- repaint_bnt = gr.Button("Repaint", variant="primary")
810
- repaint_outputs, repaint_input_params_json = create_output_ui("Repaint")
811
-
812
- def repaint_process_func(
813
- text2music_json_data,
814
- repaint_json_data,
815
- retake_variance,
816
- retake_seeds,
817
- repaint_start,
818
- repaint_end,
819
- repaint_source,
820
- repaint_source_audio_upload,
821
- prompt,
822
- lyrics,
823
- infer_step,
824
- guidance_scale,
825
- scheduler_type,
826
- cfg_type,
827
- omega_scale,
828
- manual_seeds,
829
- guidance_interval,
830
- guidance_interval_decay,
831
- min_guidance_scale,
832
- use_erg_tag,
833
- use_erg_lyric,
834
- use_erg_diffusion,
835
- oss_steps,
836
- guidance_scale_text,
837
- guidance_scale_lyric,
838
- ):
839
- if repaint_source == "upload":
840
- src_audio_path = repaint_source_audio_upload
841
- audio_duration = librosa.get_duration(filename=src_audio_path)
842
- json_data = {"audio_duration": audio_duration}
843
- elif repaint_source == "text2music":
844
- json_data = text2music_json_data
845
- src_audio_path = json_data["audio_path"]
846
- elif repaint_source == "last_repaint":
847
- json_data = repaint_json_data
848
- src_audio_path = json_data["audio_path"]
849
-
850
- return enhanced_process_func(
851
- json_data["audio_duration"],
852
- prompt,
853
- lyrics,
854
- infer_step,
855
- guidance_scale,
856
- scheduler_type,
857
- cfg_type,
858
- omega_scale,
859
- manual_seeds,
860
- guidance_interval,
861
- guidance_interval_decay,
862
- min_guidance_scale,
863
- use_erg_tag,
864
- use_erg_lyric,
865
- use_erg_diffusion,
866
- oss_steps,
867
- guidance_scale_text,
868
- guidance_scale_lyric,
869
- retake_seeds=retake_seeds,
870
- retake_variance=retake_variance,
871
- task="repaint",
872
- repaint_start=repaint_start,
873
- repaint_end=repaint_end,
874
- src_audio_path=src_audio_path,
875
- lora_name_or_path="none"
876
- )
877
-
878
- repaint_bnt.click(
879
- fn=repaint_process_func,
880
- inputs=[
881
- input_params_json,
882
- repaint_input_params_json,
883
- retake_variance,
884
- retake_seeds,
885
- repaint_start,
886
- repaint_end,
887
- repaint_source,
888
- repaint_source_audio_upload,
889
- prompt,
890
- lyrics,
891
- infer_step,
892
- guidance_scale,
893
- scheduler_type,
894
- cfg_type,
895
- omega_scale,
896
- manual_seeds,
897
- guidance_interval,
898
- guidance_interval_decay,
899
- min_guidance_scale,
900
- use_erg_tag,
901
- use_erg_lyric,
902
- use_erg_diffusion,
903
- oss_steps,
904
- guidance_scale_text,
905
- guidance_scale_lyric,
906
- ],
907
- outputs=repaint_outputs + [repaint_input_params_json],
908
- )
909
-
910
- with gr.Tab("edit"):
911
- edit_prompt = gr.Textbox(lines=2, label="Edit Tags", max_lines=4)
912
- edit_lyrics = gr.Textbox(lines=9, label="Edit Lyrics", max_lines=13)
913
- retake_seeds = gr.Textbox(
914
- label="edit seeds (default None)", placeholder="", value=None
915
- )
916
-
917
- edit_type = gr.Radio(
918
- ["only_lyrics", "remix"],
919
- value="only_lyrics",
920
- label="Edit Type",
921
- elem_id="edit_type",
922
- info="`only_lyrics` will keep the whole song the same except lyrics difference. Make your diffrence smaller, e.g. one lyrc line change.\nremix can change the song melody and genre",
923
- )
924
- edit_n_min = gr.Slider(
925
- minimum=0.0,
926
- maximum=1.0,
927
- step=0.01,
928
- value=0.6,
929
- label="edit_n_min",
930
- interactive=True,
931
- )
932
- edit_n_max = gr.Slider(
933
- minimum=0.0,
934
- maximum=1.0,
935
- step=0.01,
936
- value=1.0,
937
- label="edit_n_max",
938
- interactive=True,
939
- )
940
-
941
- def edit_type_change_func(edit_type):
942
- if edit_type == "only_lyrics":
943
- n_min = 0.6
944
- n_max = 1.0
945
- elif edit_type == "remix":
946
- n_min = 0.2
947
- n_max = 0.4
948
- return n_min, n_max
949
-
950
- edit_type.change(
951
- edit_type_change_func,
952
- inputs=[edit_type],
953
- outputs=[edit_n_min, edit_n_max],
954
- )
955
-
956
- edit_source = gr.Radio(
957
- ["text2music", "last_edit", "upload"],
958
- value="text2music",
959
- label="Edit Source",
960
- elem_id="edit_source",
961
- )
962
- edit_source_audio_upload = gr.Audio(
963
- label="Upload Audio",
964
- type="filepath",
965
- visible=False,
966
- elem_id="edit_source_audio_upload",
967
- show_download_button=True,
968
- )
969
- edit_source.change(
970
- fn=lambda x: gr.update(
971
- visible=x == "upload", elem_id="edit_source_audio_upload"
972
- ),
973
- inputs=[edit_source],
974
- outputs=[edit_source_audio_upload],
975
- )
976
-
977
- edit_bnt = gr.Button("Edit", variant="primary")
978
- edit_outputs, edit_input_params_json = create_output_ui("Edit")
979
-
980
- def edit_process_func(
981
- text2music_json_data,
982
- edit_input_params_json,
983
- edit_source,
984
- edit_source_audio_upload,
985
- prompt,
986
- lyrics,
987
- edit_prompt,
988
- edit_lyrics,
989
- edit_n_min,
990
- edit_n_max,
991
- infer_step,
992
- guidance_scale,
993
- scheduler_type,
994
- cfg_type,
995
- omega_scale,
996
- manual_seeds,
997
- guidance_interval,
998
- guidance_interval_decay,
999
- min_guidance_scale,
1000
- use_erg_tag,
1001
- use_erg_lyric,
1002
- use_erg_diffusion,
1003
- oss_steps,
1004
- guidance_scale_text,
1005
- guidance_scale_lyric,
1006
- retake_seeds,
1007
- ):
1008
- if edit_source == "upload":
1009
- src_audio_path = edit_source_audio_upload
1010
- audio_duration = librosa.get_duration(filename=src_audio_path)
1011
- json_data = {"audio_duration": audio_duration}
1012
- elif edit_source == "text2music":
1013
- json_data = text2music_json_data
1014
- src_audio_path = json_data["audio_path"]
1015
- elif edit_source == "last_edit":
1016
- json_data = edit_input_params_json
1017
- src_audio_path = json_data["audio_path"]
1018
-
1019
- if not edit_prompt:
1020
- edit_prompt = prompt
1021
- if not edit_lyrics:
1022
- edit_lyrics = lyrics
1023
-
1024
- return enhanced_process_func(
1025
- json_data["audio_duration"],
1026
- prompt,
1027
- lyrics,
1028
- infer_step,
1029
- guidance_scale,
1030
- scheduler_type,
1031
- cfg_type,
1032
- omega_scale,
1033
- manual_seeds,
1034
- guidance_interval,
1035
- guidance_interval_decay,
1036
- min_guidance_scale,
1037
- use_erg_tag,
1038
- use_erg_lyric,
1039
- use_erg_diffusion,
1040
- oss_steps,
1041
- guidance_scale_text,
1042
- guidance_scale_lyric,
1043
- task="edit",
1044
- src_audio_path=src_audio_path,
1045
- edit_target_prompt=edit_prompt,
1046
- edit_target_lyrics=edit_lyrics,
1047
- edit_n_min=edit_n_min,
1048
- edit_n_max=edit_n_max,
1049
- retake_seeds=retake_seeds,
1050
- lora_name_or_path="none"
1051
- )
1052
-
1053
- edit_bnt.click(
1054
- fn=edit_process_func,
1055
- inputs=[
1056
- input_params_json,
1057
- edit_input_params_json,
1058
- edit_source,
1059
- edit_source_audio_upload,
1060
- prompt,
1061
- lyrics,
1062
- edit_prompt,
1063
- edit_lyrics,
1064
- edit_n_min,
1065
- edit_n_max,
1066
- infer_step,
1067
- guidance_scale,
1068
- scheduler_type,
1069
- cfg_type,
1070
- omega_scale,
1071
- manual_seeds,
1072
- guidance_interval,
1073
- guidance_interval_decay,
1074
- min_guidance_scale,
1075
- use_erg_tag,
1076
- use_erg_lyric,
1077
- use_erg_diffusion,
1078
- oss_steps,
1079
- guidance_scale_text,
1080
- guidance_scale_lyric,
1081
- retake_seeds,
1082
- ],
1083
- outputs=edit_outputs + [edit_input_params_json],
1084
- )
1085
-
1086
- with gr.Tab("extend"):
1087
- extend_seeds = gr.Textbox(
1088
- label="extend seeds (default None)", placeholder="", value=None
1089
- )
1090
- left_extend_length = gr.Slider(
1091
- minimum=0.0,
1092
- maximum=240.0,
1093
- step=0.01,
1094
- value=0.0,
1095
- label="Left Extend Length",
1096
- interactive=True,
1097
- )
1098
- right_extend_length = gr.Slider(
1099
- minimum=0.0,
1100
- maximum=240.0,
1101
- step=0.01,
1102
- value=30.0,
1103
- label="Right Extend Length",
1104
- interactive=True,
1105
- )
1106
- extend_source = gr.Radio(
1107
- ["text2music", "last_extend", "upload"],
1108
- value="text2music",
1109
- label="Extend Source",
1110
- elem_id="extend_source",
1111
- )
1112
-
1113
- extend_source_audio_upload = gr.Audio(
1114
- label="Upload Audio",
1115
- type="filepath",
1116
- visible=False,
1117
- elem_id="extend_source_audio_upload",
1118
- show_download_button=True,
1119
- )
1120
- extend_source.change(
1121
- fn=lambda x: gr.update(
1122
- visible=x == "upload", elem_id="extend_source_audio_upload"
1123
- ),
1124
- inputs=[extend_source],
1125
- outputs=[extend_source_audio_upload],
1126
- )
1127
-
1128
- extend_bnt = gr.Button("Extend", variant="primary")
1129
- extend_outputs, extend_input_params_json = create_output_ui("Extend")
1130
-
1131
- def extend_process_func(
1132
- text2music_json_data,
1133
- extend_input_params_json,
1134
- extend_seeds,
1135
- left_extend_length,
1136
- right_extend_length,
1137
- extend_source,
1138
- extend_source_audio_upload,
1139
- prompt,
1140
- lyrics,
1141
- infer_step,
1142
- guidance_scale,
1143
- scheduler_type,
1144
- cfg_type,
1145
- omega_scale,
1146
- manual_seeds,
1147
- guidance_interval,
1148
- guidance_interval_decay,
1149
- min_guidance_scale,
1150
- use_erg_tag,
1151
- use_erg_lyric,
1152
- use_erg_diffusion,
1153
- oss_steps,
1154
- guidance_scale_text,
1155
- guidance_scale_lyric,
1156
- ):
1157
- if extend_source == "upload":
1158
- src_audio_path = extend_source_audio_upload
1159
- # get audio duration
1160
- audio_duration = librosa.get_duration(filename=src_audio_path)
1161
- json_data = {"audio_duration": audio_duration}
1162
- elif extend_source == "text2music":
1163
- json_data = text2music_json_data
1164
- src_audio_path = json_data["audio_path"]
1165
- elif extend_source == "last_extend":
1166
- json_data = extend_input_params_json
1167
- src_audio_path = json_data["audio_path"]
1168
-
1169
- repaint_start = -left_extend_length
1170
- repaint_end = json_data["audio_duration"] + right_extend_length
1171
- return enhanced_process_func(
1172
- json_data["audio_duration"],
1173
- prompt,
1174
- lyrics,
1175
- infer_step,
1176
- guidance_scale,
1177
- scheduler_type,
1178
- cfg_type,
1179
- omega_scale,
1180
- manual_seeds,
1181
- guidance_interval,
1182
- guidance_interval_decay,
1183
- min_guidance_scale,
1184
- use_erg_tag,
1185
- use_erg_lyric,
1186
- use_erg_diffusion,
1187
- oss_steps,
1188
- guidance_scale_text,
1189
- guidance_scale_lyric,
1190
- retake_seeds=extend_seeds,
1191
- retake_variance=1.0,
1192
- task="extend",
1193
- repaint_start=repaint_start,
1194
- repaint_end=repaint_end,
1195
- src_audio_path=src_audio_path,
1196
- lora_name_or_path="none"
1197
- )
1198
-
1199
- extend_bnt.click(
1200
- fn=extend_process_func,
1201
- inputs=[
1202
- input_params_json,
1203
- extend_input_params_json,
1204
- extend_seeds,
1205
- left_extend_length,
1206
- right_extend_length,
1207
- extend_source,
1208
- extend_source_audio_upload,
1209
- prompt,
1210
- lyrics,
1211
- infer_step,
1212
- guidance_scale,
1213
- scheduler_type,
1214
- cfg_type,
1215
- omega_scale,
1216
- manual_seeds,
1217
- guidance_interval,
1218
- guidance_interval_decay,
1219
- min_guidance_scale,
1220
- use_erg_tag,
1221
- use_erg_lyric,
1222
- use_erg_diffusion,
1223
- oss_steps,
1224
- guidance_scale_text,
1225
- guidance_scale_lyric,
1226
- ],
1227
- outputs=extend_outputs + [extend_input_params_json],
1228
- )
1229
-
1230
- # ===== ๊ฐ„๋‹จํ•˜๊ณ  ์ง์ ‘์ ์ธ ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ =====
1231
- print("๐Ÿ”— ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ ์—ฐ๊ฒฐ ์ค‘...")
1232
 
1233
- # 1. ์žฅ๋ฅด ํ”„๋ฆฌ์…‹ ๋ณ€๊ฒฝ
1234
- def on_genre_change(genre, style):
1235
- print(f"๐ŸŽต Genre changed to: {genre}")
 
 
 
 
 
 
 
 
1236
  if genre == "Custom":
1237
  return TAG_DEFAULT
 
1238
  tags = GENRE_PRESETS.get(genre, TAG_DEFAULT)
1239
- if style and style in SONG_STYLES:
1240
  tags = f"{tags}, {SONG_STYLES[style]}"
1241
  return tags
1242
 
1243
- genre_preset.change(
1244
- fn=on_genre_change,
1245
- inputs=[genre_preset, song_style],
1246
- outputs=prompt
1247
- )
1248
-
1249
- # 2. ์Šคํƒ€์ผ ๋ณ€๊ฒฝ
1250
- def on_style_change(genre, style):
1251
- print(f"๐ŸŽค Style changed to: {style}")
1252
  if genre == "Custom":
1253
  base_tags = TAG_DEFAULT
1254
  else:
1255
  base_tags = GENRE_PRESETS.get(genre, TAG_DEFAULT)
1256
 
1257
- if style and style in SONG_STYLES:
1258
  return f"{base_tags}, {SONG_STYLES[style]}"
1259
  return base_tags
1260
 
1261
- song_style.change(
1262
- fn=on_style_change,
1263
- inputs=[genre_preset, song_style],
1264
- outputs=prompt
1265
- )
1266
-
1267
- # 3. ํ’ˆ์งˆ ํ”„๋ฆฌ์…‹
1268
- def on_quality_change(preset):
1269
- print(f"โšก Quality preset changed to: {preset}")
1270
- if preset in QUALITY_PRESETS:
1271
- p = QUALITY_PRESETS[preset]
1272
- return (
1273
- p["description"],
1274
- p["infer_step"],
1275
- p["guidance_scale"],
1276
- p["scheduler_type"],
1277
- p["omega_scale"],
1278
- p["use_erg_diffusion"],
1279
- p["use_erg_tag"]
1280
- )
1281
- return ("", 150, 15.0, "euler", 10.0, True, True)
1282
-
1283
- quality_preset.change(
1284
- fn=on_quality_change,
1285
- inputs=quality_preset,
1286
- outputs=[preset_description, infer_step, guidance_scale, scheduler_type, omega_scale, use_erg_diffusion, use_erg_tag]
1287
- )
1288
-
1289
- # 4. AI ์ž‘์‚ฌ
1290
- def generate_lyrics_click(prompt_text, genre, style):
1291
- print(f"๐Ÿค– AI ์ž‘์‚ฌ ๋ฒ„ํŠผ ํด๋ฆญ! Prompt: '{prompt_text}'")
1292
- if not prompt_text:
1293
- return "์ž‘์‚ฌ ์ฃผ์ œ๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”!"
1294
- return generate_lyrics_with_ai(prompt_text, genre, style)
1295
 
1296
- generate_lyrics_btn.click(
1297
- fn=generate_lyrics_click,
1298
- inputs=[lyric_prompt, genre_preset, song_style],
1299
- outputs=lyrics
1300
- )
 
 
 
 
 
 
1301
 
1302
- # 5. Random ๋ฒ„ํŠผ
1303
- def random_click(genre, style):
1304
- print("๐ŸŽฒ Random ๋ฒ„ํŠผ ํด๋ฆญ!")
1305
-
 
 
 
 
 
 
 
 
1306
  if genre == "Custom":
1307
  genre = random.choice(list(GENRE_PRESETS.keys()))
1308
 
1309
- themes = ["๋„์‹œ์˜ ๋ฐค", "์ฒซ์‚ฌ๋ž‘", "์—ฌ๋ฆ„ ํ•ด๋ณ€", "๊ฐ€์„ ์ •์ทจ", "ํฌ๋ง", "์ž์œ ", "๋ณ„๋น›", "์ฒญ์ถ˜"]
1310
  theme = random.choice(themes)
1311
  duration = random.choice([30, 60, 90, 120])
1312
 
1313
- # ํƒœ๊ทธ
1314
  tags = GENRE_PRESETS.get(genre, TAG_DEFAULT)
1315
  if style in SONG_STYLES:
1316
  tags = f"{tags}, {SONG_STYLES[style]}"
1317
 
1318
- # ๊ฐ€์‚ฌ ์ƒ์„ฑ
1319
  new_lyrics = generate_lyrics_with_ai(theme, genre, style)
1320
 
1321
- return [
1322
- duration, tags, new_lyrics, 150, 15.0, "euler", "apg", 10.0,
1323
- str(random.randint(1, 10000)), 0.5, 0.0, 3.0, True, False, True,
1324
- None, 0.0, 0.0, False, 0.5, None
1325
- ]
1326
-
1327
- random_bnt.click(
1328
- fn=random_click,
1329
- inputs=[genre_preset, song_style],
1330
- outputs=[
1331
- audio_duration, prompt, lyrics, infer_step, guidance_scale,
1332
- scheduler_type, cfg_type, omega_scale, manual_seeds,
1333
- guidance_interval, guidance_interval_decay, min_guidance_scale,
1334
- use_erg_tag, use_erg_lyric, use_erg_diffusion, oss_steps,
1335
- guidance_scale_text, guidance_scale_lyric, audio2audio_enable,
1336
- ref_audio_strength, ref_audio_input
1337
- ]
1338
- )
1339
-
1340
- # 6. Preview ๋ฒ„ํŠผ
1341
- preview_bnt.click(
1342
- fn=lambda p, l, g, s: print(f"๐ŸŽต Preview clicked! Genre: {g}, Style: {s}"),
1343
- inputs=[prompt, lyrics, genre_preset, song_style],
1344
- outputs=None
1345
- )
1346
 
1347
  # 7. ๋ฉ”์ธ ์ƒ์„ฑ ๋ฒ„ํŠผ
1348
- text2music_bnt.click(
1349
  fn=enhanced_process_func,
1350
- inputs=[
1351
- audio_duration, prompt, lyrics, infer_step, guidance_scale,
1352
- scheduler_type, cfg_type, omega_scale, manual_seeds,
1353
- guidance_interval, guidance_interval_decay, min_guidance_scale,
1354
- use_erg_tag, use_erg_lyric, use_erg_diffusion, oss_steps,
1355
- guidance_scale_text, guidance_scale_lyric, audio2audio_enable,
1356
- ref_audio_strength, ref_audio_input, lora_name_or_path,
1357
- multi_seed_mode, enable_smart_enhancement, genre_preset, song_style
1358
- ],
1359
  outputs=outputs + [input_params_json]
1360
  )
1361
 
1362
- print("โœ… ๋ชจ๋“  ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ ์—ฐ๊ฒฐ ์™„๋ฃŒ!")
1363
 
1364
 
1365
  def create_main_demo_ui(
 
441
  ):
442
  # ํ–ฅ์ƒ๋œ ํ”„๋กœ์„ธ์Šค ํ•จ์ˆ˜ ์ƒ์„ฑ
443
  enhanced_process_func = create_enhanced_process_func(text2music_process_func)
444
+
445
+ # UI ์š”์†Œ๋ฅผ ์ €์žฅํ•  ๋”•์…”๋„ˆ๋ฆฌ
446
+ ui = {}
447
 
448
  with gr.Row():
449
  with gr.Column():
 
451
  with gr.Group():
452
  gr.Markdown("### โšก ํ’ˆ์งˆ & ์„ฑ๋Šฅ ์„ค์ •")
453
  with gr.Row():
454
+ ui['quality_preset'] = gr.Dropdown(
455
  choices=list(QUALITY_PRESETS.keys()),
456
  value="Standard",
457
  label="ํ’ˆ์งˆ ํ”„๋ฆฌ์…‹",
458
  scale=2,
459
  interactive=True
460
  )
461
+ ui['multi_seed_mode'] = gr.Dropdown(
462
  choices=list(MULTI_SEED_OPTIONS.keys()),
463
  value="Single",
464
  label="๋‹ค์ค‘ ์ƒ์„ฑ ๋ชจ๋“œ",
 
467
  interactive=True
468
  )
469
 
470
+ ui['preset_description'] = gr.Textbox(
471
  value=QUALITY_PRESETS["Standard"]["description"],
472
  label="์„ค๋ช…",
473
  interactive=False,
 
475
  )
476
 
477
  with gr.Row(equal_height=True):
478
+ ui['audio_duration'] = gr.Slider(
 
479
  -1,
480
  240.0,
481
  step=0.00001,
 
485
  info="-1 means random duration (30 ~ 240).",
486
  scale=7,
487
  )
488
+ ui['random_bnt'] = gr.Button("๐ŸŽฒ Random", variant="secondary", scale=1)
489
+ ui['preview_bnt'] = gr.Button("๐ŸŽต Preview", variant="secondary", scale=2)
490
 
491
  # audio2audio
492
  with gr.Row(equal_height=True):
493
+ ui['audio2audio_enable'] = gr.Checkbox(
494
  label="Enable Audio2Audio",
495
  value=False,
496
  info="Check to enable Audio-to-Audio generation using a reference audio.",
497
  elem_id="audio2audio_checkbox"
498
  )
499
+ ui['lora_name_or_path'] = gr.Dropdown(
500
  label="Lora Name or Path",
501
  choices=["ACE-Step/ACE-Step-v1-chinese-rap-LoRA", "none"],
502
  value="none",
503
  allow_custom_value=True,
504
  )
505
 
506
+ ui['ref_audio_input'] = gr.Audio(
507
  type="filepath",
508
  label="Reference Audio (for Audio2Audio)",
509
  visible=False,
510
  elem_id="ref_audio_input",
511
  show_download_button=True
512
  )
513
+ ui['ref_audio_strength'] = gr.Slider(
514
  label="Refer audio strength",
515
  minimum=0.0,
516
  maximum=1.0,
 
521
  interactive=True,
522
  )
523
 
 
 
 
 
 
 
 
 
 
 
 
 
524
  with gr.Column(scale=2):
525
  with gr.Group():
526
  gr.Markdown("""### ๐ŸŽผ ์Šค๋งˆํŠธ ํ”„๋กฌํ”„ํŠธ ์‹œ์Šคํ…œ
527
  <center>์žฅ๋ฅด์™€ ์Šคํƒ€์ผ์„ ์„ ํƒํ•˜๋ฉด ์ž๋™์œผ๋กœ ์ตœ์ ํ™”๋œ ํƒœ๊ทธ๊ฐ€ ์ถ”๊ฐ€๋ฉ๋‹ˆ๋‹ค.</center>""")
528
 
529
  with gr.Row():
530
+ ui['genre_preset'] = gr.Dropdown(
531
  choices=["Custom"] + list(GENRE_PRESETS.keys()),
532
  value="Custom",
533
  label="์žฅ๋ฅด ํ”„๋ฆฌ์…‹",
534
  scale=1,
535
  interactive=True
536
  )
537
+ ui['song_style'] = gr.Dropdown(
538
  choices=list(SONG_STYLES.keys()),
539
  value="๋“€์—ฃ (๋‚จ๋…€ ํ˜ผ์„ฑ)",
540
  label="๊ณก ์Šคํƒ€์ผ",
541
  scale=1,
542
  interactive=True
543
  )
544
+ ui['enable_smart_enhancement'] = gr.Checkbox(
545
  label="์Šค๋งˆํŠธ ํ–ฅ์ƒ",
546
  value=True,
547
  info="์ž๋™ ํƒœ๊ทธ ์ตœ์ ํ™”",
548
  scale=1
549
  )
550
 
551
+ ui['prompt'] = gr.Textbox(
552
  lines=2,
553
  label="Tags",
554
  max_lines=4,
 
562
  <center>์ฃผ์ œ๋ฅผ ์ž…๋ ฅํ•˜๊ณ  'AI ์ž‘์‚ฌ' ๋ฒ„ํŠผ์„ ํด๋ฆญํ•˜๋ฉด ์ž๋™์œผ๋กœ ๊ฐ€์‚ฌ๊ฐ€ ์ƒ์„ฑ๋ฉ๋‹ˆ๋‹ค.</center>""")
563
 
564
  with gr.Row():
565
+ ui['lyric_prompt'] = gr.Textbox(
566
  label="์ž‘์‚ฌ ์ฃผ์ œ",
567
  placeholder="์˜ˆ: ์ฒซ์‚ฌ๋ž‘์˜ ์„ค๋ ˜, ์ด๋ณ„์˜ ์•„ํ””, ํฌ๋ง์ฐฌ ๋‚ด์ผ...",
568
  scale=3,
569
  interactive=True
570
  )
571
+ ui['generate_lyrics_btn'] = gr.Button("๐Ÿค– AI ์ž‘์‚ฌ", variant="secondary", scale=1)
572
 
573
+ ui['lyrics'] = gr.Textbox(
574
  lines=9,
575
  label="Lyrics",
576
  max_lines=13,
 
580
  )
581
 
582
  with gr.Accordion("Basic Settings", open=False):
583
+ ui['infer_step'] = gr.Slider(
584
  minimum=1,
585
  maximum=300,
586
  step=1,
 
588
  label="Infer Steps",
589
  interactive=True,
590
  )
591
+ ui['guidance_scale'] = gr.Slider(
592
  minimum=0.0,
593
  maximum=30.0,
594
  step=0.1,
 
597
  interactive=True,
598
  info="When guidance_scale_lyric > 1 and guidance_scale_text > 1, the guidance scale will not be applied.",
599
  )
600
+ ui['guidance_scale_text'] = gr.Slider(
601
  minimum=0.0,
602
  maximum=10.0,
603
  step=0.1,
 
606
  interactive=True,
607
  info="Guidance scale for text condition. It can only apply to cfg. set guidance_scale_text=5.0, guidance_scale_lyric=1.5 for start",
608
  )
609
+ ui['guidance_scale_lyric'] = gr.Slider(
610
  minimum=0.0,
611
  maximum=10.0,
612
  step=0.1,
 
615
  interactive=True,
616
  )
617
 
618
+ ui['manual_seeds'] = gr.Textbox(
619
  label="manual seeds (default None)",
620
  placeholder="1,2,3,4",
621
  value=None,
 
623
  )
624
 
625
  with gr.Accordion("Advanced Settings", open=False):
626
+ ui['scheduler_type'] = gr.Radio(
627
  ["euler", "heun"],
628
  value="euler",
629
  label="Scheduler Type",
630
  elem_id="scheduler_type",
631
  info="Scheduler type for the generation. euler is recommended. heun will take more time.",
632
  )
633
+ ui['cfg_type'] = gr.Radio(
634
  ["cfg", "apg", "cfg_star"],
635
  value="apg",
636
  label="CFG Type",
637
  elem_id="cfg_type",
638
  info="CFG type for the generation. apg is recommended. cfg and cfg_star are almost the same.",
639
  )
640
+ ui['use_erg_tag'] = gr.Checkbox(
641
  label="use ERG for tag",
642
  value=True,
643
  info="Use Entropy Rectifying Guidance for tag. It will multiple a temperature to the attention to make a weaker tag condition and make better diversity.",
644
  )
645
+ ui['use_erg_lyric'] = gr.Checkbox(
646
  label="use ERG for lyric",
647
  value=False,
648
  info="The same but apply to lyric encoder's attention.",
649
  )
650
+ ui['use_erg_diffusion'] = gr.Checkbox(
651
  label="use ERG for diffusion",
652
  value=True,
653
  info="The same but apply to diffusion model's attention.",
654
  )
655
 
656
+ ui['omega_scale'] = gr.Slider(
657
  minimum=-100.0,
658
  maximum=100.0,
659
  step=0.1,
 
663
  info="Granularity scale for the generation. Higher values can reduce artifacts",
664
  )
665
 
666
+ ui['guidance_interval'] = gr.Slider(
667
  minimum=0.0,
668
  maximum=1.0,
669
  step=0.01,
 
672
  interactive=True,
673
  info="Guidance interval for the generation. 0.5 means only apply guidance in the middle steps (0.25 * infer_steps to 0.75 * infer_steps)",
674
  )
675
+ ui['guidance_interval_decay'] = gr.Slider(
676
  minimum=0.0,
677
  maximum=1.0,
678
  step=0.01,
 
681
  interactive=True,
682
  info="Guidance interval decay for the generation. Guidance scale will decay from guidance_scale to min_guidance_scale in the interval. 0.0 means no decay.",
683
  )
684
+ ui['min_guidance_scale'] = gr.Slider(
685
  minimum=0.0,
686
  maximum=200.0,
687
  step=0.1,
 
690
  interactive=True,
691
  info="Min guidance scale for guidance interval decay's end scale",
692
  )
693
+ ui['oss_steps'] = gr.Textbox(
694
  label="OSS Steps",
695
  placeholder="16, 29, 52, 96, 129, 158, 172, 183, 189, 200",
696
  value=None,
697
  info="Optimal Steps for the generation. But not test well",
698
  )
699
 
700
+ ui['text2music_bnt'] = gr.Button("๐ŸŽต Generate Music", variant="primary", size="lg")
701
 
702
  with gr.Column():
703
  outputs, input_params_json = create_output_ui()
704
 
705
+ # retake, repainting, edit, extend ํƒญ๋“ค์€ ๊ธฐ์กด ์ฝ”๋“œ์™€ ๋™์ผ...
706
+ # (์—ฌ๊ธฐ์„œ๋Š” ์ƒ๋žตํ•˜์ง€๋งŒ ์‹ค์ œ๋กœ๋Š” ํฌํ•จ๋˜์–ด์•ผ ํ•จ)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
707
 
708
+ # ============ ๋ชจ๋“  UI ์ƒ์„ฑ ์™„๋ฃŒ ํ›„ ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ ์—ฐ๊ฒฐ ============
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
709
 
710
+ # 1. Audio2Audio ํ† ๊ธ€
711
+ ui['audio2audio_enable'].change(
712
+ fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
713
+ inputs=[ui['audio2audio_enable']],
714
+ outputs=[ui['ref_audio_input'], ui['ref_audio_strength']]
715
+ )
716
+
717
+ # 2. ์žฅ๋ฅด ํ”„๋ฆฌ์…‹ ๋ณ€๊ฒฝ
718
+ @gr.on(triggers=[ui['genre_preset'].change], inputs=[ui['genre_preset'], ui['song_style']], outputs=[ui['prompt']])
719
+ def update_tags_for_genre(genre, style):
720
+ print(f"๐ŸŽต Genre changed: {genre}, Style: {style}")
721
  if genre == "Custom":
722
  return TAG_DEFAULT
723
+
724
  tags = GENRE_PRESETS.get(genre, TAG_DEFAULT)
725
+ if style in SONG_STYLES:
726
  tags = f"{tags}, {SONG_STYLES[style]}"
727
  return tags
728
 
729
+ # 3. ๊ณก ์Šคํƒ€์ผ ๋ณ€๊ฒฝ
730
+ @gr.on(triggers=[ui['song_style'].change], inputs=[ui['genre_preset'], ui['song_style']], outputs=[ui['prompt']])
731
+ def update_tags_for_style(genre, style):
732
+ print(f"๐ŸŽค Style changed: {style}, Genre: {genre}")
 
 
 
 
 
733
  if genre == "Custom":
734
  base_tags = TAG_DEFAULT
735
  else:
736
  base_tags = GENRE_PRESETS.get(genre, TAG_DEFAULT)
737
 
738
+ if style in SONG_STYLES:
739
  return f"{base_tags}, {SONG_STYLES[style]}"
740
  return base_tags
741
 
742
+ # 4. ํ’ˆ์งˆ ํ”„๋ฆฌ์…‹ ๋ณ€๊ฒฝ
743
+ @gr.on(triggers=[ui['quality_preset'].change], inputs=[ui['quality_preset']],
744
+ outputs=[ui['preset_description'], ui['infer_step'], ui['guidance_scale'],
745
+ ui['scheduler_type'], ui['omega_scale'], ui['use_erg_diffusion'], ui['use_erg_tag']])
746
+ def update_quality_settings(preset):
747
+ print(f"โšก Quality preset: {preset}")
748
+ if preset not in QUALITY_PRESETS:
749
+ return ("", 150, 15.0, "euler", 10.0, True, True)
750
+
751
+ p = QUALITY_PRESETS[preset]
752
+ return (p["description"], p["infer_step"], p["guidance_scale"],
753
+ p["scheduler_type"], p["omega_scale"], p["use_erg_diffusion"], p["use_erg_tag"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
754
 
755
+ # 5. AI ์ž‘์‚ฌ ๋ฒ„ํŠผ
756
+ @gr.on(triggers=[ui['generate_lyrics_btn'].click],
757
+ inputs=[ui['lyric_prompt'], ui['genre_preset'], ui['song_style']],
758
+ outputs=[ui['lyrics']])
759
+ def generate_lyrics_handler(prompt, genre, style):
760
+ print(f"๐Ÿค– Generate lyrics: {prompt}")
761
+ if not prompt or prompt.strip() == "":
762
+ gr.Warning("์ž‘์‚ฌ ์ฃผ์ œ๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”!")
763
+ return ui['lyrics'].value
764
+
765
+ return generate_lyrics_with_ai(prompt, genre, style)
766
 
767
+ # 6. Random ๋ฒ„ํŠผ
768
+ @gr.on(triggers=[ui['random_bnt'].click],
769
+ inputs=[ui['genre_preset'], ui['song_style']],
770
+ outputs=[ui['audio_duration'], ui['prompt'], ui['lyrics'], ui['infer_step'],
771
+ ui['guidance_scale'], ui['scheduler_type'], ui['cfg_type'], ui['omega_scale'],
772
+ ui['manual_seeds'], ui['guidance_interval'], ui['guidance_interval_decay'],
773
+ ui['min_guidance_scale'], ui['use_erg_tag'], ui['use_erg_lyric'],
774
+ ui['use_erg_diffusion'], ui['oss_steps'], ui['guidance_scale_text'],
775
+ ui['guidance_scale_lyric'], ui['audio2audio_enable'], ui['ref_audio_strength'],
776
+ ui['ref_audio_input']])
777
+ def random_generation(genre, style):
778
+ print("๐ŸŽฒ Random generation")
779
  if genre == "Custom":
780
  genre = random.choice(list(GENRE_PRESETS.keys()))
781
 
782
+ themes = ["๋„์‹œ์˜ ๋ฐค", "์ฒซ์‚ฌ๋ž‘", "์—ฌ๋ฆ„ ํ•ด๋ณ€", "๊ฐ€์„ ์ •์ทจ"]
783
  theme = random.choice(themes)
784
  duration = random.choice([30, 60, 90, 120])
785
 
 
786
  tags = GENRE_PRESETS.get(genre, TAG_DEFAULT)
787
  if style in SONG_STYLES:
788
  tags = f"{tags}, {SONG_STYLES[style]}"
789
 
 
790
  new_lyrics = generate_lyrics_with_ai(theme, genre, style)
791
 
792
+ return [duration, tags, new_lyrics, 150, 15.0, "euler", "apg", 10.0,
793
+ str(random.randint(1, 10000)), 0.5, 0.0, 3.0, True, False, True,
794
+ None, 0.0, 0.0, False, 0.5, None]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
795
 
796
  # 7. ๋ฉ”์ธ ์ƒ์„ฑ ๋ฒ„ํŠผ
797
+ ui['text2music_bnt'].click(
798
  fn=enhanced_process_func,
799
+ inputs=[ui['audio_duration'], ui['prompt'], ui['lyrics'], ui['infer_step'],
800
+ ui['guidance_scale'], ui['scheduler_type'], ui['cfg_type'], ui['omega_scale'],
801
+ ui['manual_seeds'], ui['guidance_interval'], ui['guidance_interval_decay'],
802
+ ui['min_guidance_scale'], ui['use_erg_tag'], ui['use_erg_lyric'],
803
+ ui['use_erg_diffusion'], ui['oss_steps'], ui['guidance_scale_text'],
804
+ ui['guidance_scale_lyric'], ui['audio2audio_enable'], ui['ref_audio_strength'],
805
+ ui['ref_audio_input'], ui['lora_name_or_path'], ui['multi_seed_mode'],
806
+ ui['enable_smart_enhancement'], ui['genre_preset'], ui['song_style']],
 
807
  outputs=outputs + [input_params_json]
808
  )
809
 
810
+ print("โœ… ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ ์—ฐ๊ฒฐ ์™„๋ฃŒ!")
811
 
812
 
813
  def create_main_demo_ui(