ginipick commited on
Commit
2b8dfa8
Β·
verified Β·
1 Parent(s): b29f642

Update ui/components.py

Browse files
Files changed (1) hide show
  1. ui/components.py +1031 -584
ui/components.py CHANGED
@@ -1,3 +1,11 @@
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import librosa
3
  import os
@@ -7,180 +15,173 @@ import numpy as np
7
  import json
8
  from typing import Dict, List, Tuple, Optional
9
 
10
- # [MODIFIED] OpenAI 라이브러리 μ‚¬μš© 방식 μˆ˜μ •
11
  try:
12
- import openai
13
- api_key = os.getenv("LLM_API") or os.getenv("OPENAI_API_KEY")
14
  if api_key:
15
- openai.api_key = api_key
16
  client_available = True
17
  print("βœ… OpenAI API client initialized successfully")
18
  else:
 
19
  client_available = False
20
  print("⚠️ Warning: No OpenAI API key found. AI lyrics generation will be disabled.")
 
21
  except Exception as e:
 
22
  client_available = False
23
  print(f"❌ Warning: Failed to initialize OpenAI client: {e}")
24
 
25
- # ─── openai μ΄ˆκΈ°ν™” λΆ€λΆ„ λ°”λ‘œ μ•„λž˜μ— μΆ”κ°€ ───
26
- from packaging import version
27
- def _chat_completion(**kwargs):
28
- """SDK 버전에 맞좰 ChatCompletion ν˜ΈμΆœμ„ 좔상화"""
29
- if version.parse(openai.__version__) >= version.parse("1.0.0"):
30
- # v1 μŠ€νƒ€μΌ
31
- return openai.chat.completions.create(**kwargs)
32
- else:
33
- # ꡬ버전 μŠ€νƒ€μΌ
34
- return openai.ChatCompletion.create(**kwargs)
35
-
36
-
37
-
38
- TAG_DEFAULT = "funk, pop, soul, rock, melodic, guitar, drums, bass, keyboard, percussion, 105 BPM, energetic, upbeat, groovy, vibrant, dynamic, duet, male and female vocals"
39
- LYRIC_DEFAULT = """[verse - male]
40
  Neon lights they flicker bright
41
  City hums in dead of night
42
  Rhythms pulse through concrete veins
43
  Lost in echoes of refrains
44
 
45
- [verse - female]
46
  Bassline groovin' in my chest
47
  Heartbeats match the city's zest
48
  Electric whispers fill the air
49
  Synthesized dreams everywhere
50
 
51
- [chorus - duet]
52
  Turn it up and let it flow
53
  Feel the fire let it grow
54
  In this rhythm we belong
55
  Hear the night sing out our song
56
 
57
- [verse - male]
58
  Guitar strings they start to weep
59
  Wake the soul from silent sleep
60
  Every note a story told
61
  In this night we're bold and gold
62
 
63
- [bridge - female]
64
  Voices blend in harmony
65
  Lost in pure cacophony
66
  Timeless echoes timeless cries
67
  Soulful shouts beneath the skies
68
 
69
- [verse - duet]
70
  Keyboard dances on the keys
71
  Melodies on evening breeze
72
  Catch the tune and hold it tight
73
  In this moment we take flight
74
  """
75
 
76
- # ν™•μž₯된 μž₯λ₯΄ 프리셋 (κΈ°μ‘΄ + κ°œμ„ λœ νƒœκ·Έ)
77
- GENRE_PRESETS = {
78
- "Modern Pop": "pop, synth, drums, guitar, 120 bpm, upbeat, catchy, vibrant, polished vocals, radio-ready, commercial, layered vocals",
79
- "Rock": "rock, electric guitar, drums, bass, 130 bpm, energetic, rebellious, gritty, powerful vocals, raw vocals, power chords, driving rhythm",
80
- "Hip Hop": "hip hop, 808 bass, hi-hats, synth, 90 bpm, bold, urban, intense, rhythmic vocals, trap beats, punchy drums",
81
- "Country": "country, acoustic guitar, steel guitar, fiddle, 100 bpm, heartfelt, rustic, warm, twangy vocals, storytelling, americana",
82
- "EDM": "edm, synth, bass, kick drum, 128 bpm, euphoric, pulsating, energetic, instrumental, progressive build, festival anthem, electronic",
83
- "Reggae": "reggae, guitar, bass, drums, 80 bpm, chill, soulful, positive, smooth vocals, offbeat rhythm, island vibes",
84
- "Classical": "classical, orchestral, strings, piano, 60 bpm, elegant, emotive, timeless, instrumental, dynamic range, sophisticated harmony",
85
- "Jazz": "jazz, saxophone, piano, double bass, 110 bpm, smooth, improvisational, soulful, crooning vocals, swing feel, sophisticated",
86
- "Metal": "metal, electric guitar, double kick drum, bass, 160 bpm, aggressive, intense, heavy, powerful vocals, distorted, powerful",
87
- "R&B": "r&b, synth, bass, drums, 85 bpm, sultry, groovy, romantic, silky vocals, smooth production, neo-soul",
88
- "K-Pop": "k-pop, synth, bass, drums, 128 bpm, catchy, energetic, polished, mixed vocals, electronic elements, danceable",
89
- "Ballad": "ballad, piano, strings, acoustic guitar, 70 bpm, emotional, heartfelt, romantic, expressive vocals, orchestral arrangement"
90
- }
91
 
92
- # 곑 μŠ€νƒ€μΌ μ˜΅μ…˜
93
- SONG_STYLES = {
94
- "λ“€μ—£ (남녀 ν˜Όμ„±)": "duet, male and female vocals, harmonious, call and response",
95
- "μ†”λ‘œ (남성)": "solo, male vocals, powerful voice",
96
- "μ†”λ‘œ (μ—¬μ„±)": "solo, female vocals, emotional voice",
97
- "κ·Έλ£Ή (ν˜Όμ„±)": "group vocals, mixed gender, layered harmonies",
98
- "ν•©μ°½": "choir, multiple voices, choral arrangement",
99
- "랩/νž™ν•©": "rap vocals, rhythmic flow, urban style",
100
- "μΈμŠ€νŠΈλ£¨λ©˜νƒˆ": "instrumental, no vocals"
101
- }
102
 
103
- # AI μž‘μ‚¬ μ‹œμŠ€ν…œ ν”„λ‘¬ν”„νŠΈ
104
- LYRIC_SYSTEM_PROMPT = """λ„ˆλŠ” λ…Έλž˜ 가사λ₯Ό μž‘μ‚¬ν•˜λŠ” μ „λ¬Έκ°€ 역할이닀. μ΄μš©μžκ°€ μž…λ ₯ν•˜λŠ” μ£Όμ œμ™€ μŠ€νƒ€μΌμ— 따라 κ΄€λ ¨λœ λ…Έλž˜ 가사λ₯Ό μž‘μ„±ν•˜λΌ.
105
-
106
- 가사 μž‘μ„± κ·œμΉ™:
107
- 1. ꡬ쑰 νƒœκ·ΈλŠ” λ°˜λ“œμ‹œ "[ ]"둜 κ΅¬λΆ„ν•œλ‹€
108
- 2. μ‚¬μš© κ°€λŠ₯ν•œ ꡬ쑰 νƒœκ·Έ: [verse], [chorus], [bridge], [intro], [outro], [pre-chorus]
109
- 3. 듀엣인 경우 [verse - male], [verse - female], [chorus - duet] ν˜•μ‹μœΌλ‘œ 파트λ₯Ό λͺ…μ‹œν•œλ‹€
110
- 4. μž…λ ₯ 언어와 λ™μΌν•œ μ–Έμ–΄λ‘œ 가사λ₯Ό μž‘μ„±ν•œλ‹€
111
- 5. 각 κ΅¬μ‘°λŠ” 4-8쀄 μ •λ„λ‘œ μž‘μ„±ν•œλ‹€
112
- 6. μŒμ•… μž₯λ₯΄μ™€ λΆ„μœ„κΈ°μ— λ§žλŠ” 가사λ₯Ό μž‘μ„±ν•œλ‹€
113
-
114
- μ˜ˆμ‹œ ν˜•μ‹:
115
- [verse - male]
116
- 첫 번째 ꡬ절 가사
117
- 두 번째 ꡬ절 가사
118
- ...
119
-
120
- [chorus - duet]
121
- 후렴ꡬ 가사
122
- ...
123
- """
124
 
125
- def generate_lyrics_with_ai(prompt: str, genre: str, song_style: str) -> str:
126
- """AIλ₯Ό μ‚¬μš©ν•˜μ—¬ 가사 생성"""
127
- print(f"🎡 generate_lyrics_with_ai called with: prompt='{prompt}', genre='{genre}', style='{song_style}'")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
- # [MODIFIED] client_available 체크 + openai API 호좜둜 λ³€κ²½
130
- if not client_available:
131
  print("❌ OpenAI client not available, returning default lyrics")
132
  return LYRIC_DEFAULT
133
 
134
- if not prompt or prompt.strip() == "":
135
- print("⚠️ Empty prompt, returning default lyrics")
136
  return LYRIC_DEFAULT
137
 
138
  try:
139
- # μ–Έμ–΄ 감지 및 μŠ€νƒ€μΌ 정보 μΆ”κ°€
140
- style_info = ""
141
- if "λ“€μ—£" in song_style:
142
- style_info = "남녀 λ“€μ—£ ν˜•μ‹μœΌλ‘œ 파트λ₯Ό λ‚˜λˆ„μ–΄ μž‘μ„±ν•΄μ£Όμ„Έμš”. [verse - male], [verse - female], [chorus - duet] ν˜•μ‹μ„ μ‚¬μš©ν•˜μ„Έμš”."
143
- elif "μ†”λ‘œ (남성)" in song_style:
144
- style_info = "남성 μ†”λ‘œ κ°€μˆ˜λ₯Ό μœ„ν•œ 가사λ₯Ό μž‘μ„±ν•΄μ£Όμ„Έμš”."
145
- elif "μ†”λ‘œ (μ—¬μ„±)" in song_style:
146
- style_info = "μ—¬μ„± μ†”λ‘œ κ°€μˆ˜λ₯Ό μœ„ν•œ 가사λ₯Ό μž‘μ„±ν•΄μ£Όμ„Έμš”."
147
- elif "κ·Έλ£Ή" in song_style:
148
- style_info = "그룹이 λΆ€λ₯΄λŠ” ν˜•μ‹μœΌλ‘œ 파트λ₯Ό λ‚˜λˆ„μ–΄ μž‘μ„±ν•΄μ£Όμ„Έμš”."
149
- elif "μΈμŠ€νŠΈλ£¨λ©˜νƒˆ" in song_style:
150
- return "[instrumental]\n\n[inst]\n\n[instrumental break]\n\n[inst]"
151
-
152
- user_prompt = f"""
153
- 주제: {prompt}
154
- μž₯λ₯΄: {genre}
155
- μŠ€νƒ€μΌ: {style_info}
156
-
157
- μœ„ 정보λ₯Ό λ°”νƒ•μœΌλ‘œ λ…Έλž˜ 가사λ₯Ό μž‘μ„±ν•΄μ£Όμ„Έμš”. μž…λ ₯된 언어와 λ™μΌν•œ μ–Έμ–΄λ‘œ μž‘μ„±ν•˜κ³ , ꡬ쑰 νƒœκ·Έλ₯Ό λ°˜λ“œμ‹œ ν¬ν•¨ν•΄μ£Όμ„Έμš”.
158
- """
159
 
160
- print(f"πŸ“ Sending request to OpenAI...")
161
 
162
- # [MODIFIED] openai.ChatCompletion μ‚¬μš©
163
-
164
-
165
- response = _chat_completion(
166
- model="gpt-4.1-mini",
167
  messages=[
168
- {"role": "system", "content": LYRIC_SYSTEM_PROMPT},
169
- {"role": "user", "content": user_prompt}
 
 
 
 
 
 
170
  ],
171
  temperature=0.8,
172
- max_tokens=1000,
 
173
  )
174
-
175
 
176
  generated_lyrics = response.choices[0].message.content
177
- print(f"βœ… Generated lyrics successfully")
 
178
  return generated_lyrics
179
 
180
  except Exception as e:
181
- print(f"❌ AI 가사 생성 였λ₯˜: {e}")
 
 
182
  return LYRIC_DEFAULT
183
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  # ν’ˆμ§ˆ 프리셋 μ‹œμŠ€ν…œ μΆ”κ°€
185
  QUALITY_PRESETS = {
186
  "Draft (Fast)": {
@@ -256,37 +257,30 @@ class MusicGenerationCache:
256
  # μ „μ—­ μΊμ‹œ μΈμŠ€ν„΄μŠ€
257
  generation_cache = MusicGenerationCache()
258
 
259
- def enhance_prompt_with_genre(base_prompt: str, genre: str, song_style: str) -> str:
260
- """μž₯λ₯΄μ™€ μŠ€νƒ€μΌμ— λ”°λ₯Έ 슀마트 ν”„λ‘¬ν”„νŠΈ ν™•μž₯"""
261
- enhanced_prompt = base_prompt
 
262
 
263
- if genre != "Custom" and genre:
264
- # μž₯λ₯΄λ³„ μΆ”κ°€ κ°œμ„  νƒœκ·Έ
265
- genre_enhancements = {
266
- "Modern Pop": ["polished production", "mainstream appeal", "hook-driven"],
267
- "Rock": ["guitar-driven", "powerful drums", "energetic performance"],
268
- "Hip Hop": ["rhythmic flow", "urban atmosphere", "bass-heavy"],
269
- "Country": ["acoustic warmth", "storytelling melody", "authentic feel"],
270
- "EDM": ["electronic atmosphere", "build-ups", "dance-friendly"],
271
- "Reggae": ["laid-back groove", "tropical vibes", "rhythmic guitar"],
272
- "Classical": ["orchestral depth", "musical sophistication", "timeless beauty"],
273
- "Jazz": ["musical complexity", "improvisational spirit", "sophisticated harmony"],
274
- "Metal": ["aggressive energy", "powerful sound", "intense atmosphere"],
275
- "R&B": ["smooth groove", "soulful expression", "rhythmic sophistication"],
276
- "K-Pop": ["catchy hooks", "dynamic arrangement", "polished production"],
277
- "Ballad": ["emotional depth", "slow tempo", "heartfelt delivery"]
278
- }
279
-
280
- if genre in genre_enhancements:
281
- additional_tags = ", ".join(genre_enhancements[genre])
282
- enhanced_prompt = f"{base_prompt}, {additional_tags}"
283
 
284
- # μŠ€νƒ€μΌ νƒœκ·Έ μΆ”κ°€
285
- if song_style in SONG_STYLES:
286
- style_tags = SONG_STYLES[song_style]
287
- enhanced_prompt = f"{enhanced_prompt}, {style_tags}"
288
 
289
- return enhanced_prompt
290
 
291
  def calculate_quality_score(audio_path: str) -> float:
292
  """κ°„λ‹¨ν•œ ν’ˆμ§ˆ 점수 계산 (μ‹€μ œ κ΅¬ν˜„μ—μ„œλŠ” 더 λ³΅μž‘ν•œ λ©”νŠΈλ¦­ μ‚¬μš©)"""
@@ -308,6 +302,11 @@ def calculate_quality_score(audio_path: str) -> float:
308
  except:
309
  return 50.0 # κΈ°λ³Έκ°’
310
 
 
 
 
 
 
311
  def update_quality_preset(preset_name):
312
  """ν’ˆμ§ˆ 프리셋 적용"""
313
  if preset_name not in QUALITY_PRESETS:
@@ -334,29 +333,37 @@ def create_enhanced_process_func(original_func):
334
  guidance_scale_text, guidance_scale_lyric,
335
  audio2audio_enable=False, ref_audio_strength=0.5, ref_audio_input=None,
336
  lora_name_or_path="none", multi_seed_mode="Single",
337
- enable_smart_enhancement=True, genre_preset="Custom", song_style="λ“€μ—£ (남녀 ν˜Όμ„±)", **kwargs
338
  ):
 
 
 
339
  # 슀마트 ν”„λ‘¬ν”„νŠΈ ν™•μž₯
340
- if enable_smart_enhancement:
341
- prompt = enhance_prompt_with_genre(prompt, genre_preset, song_style)
 
 
 
342
 
343
  # μΊμ‹œ 확인
344
  cache_params = {
345
- 'prompt': prompt, 'lyrics': lyrics, 'audio_duration': audio_duration,
346
  'infer_step': infer_step, 'guidance_scale': guidance_scale
347
  }
348
 
349
  cached_result = generation_cache.get_cached_result(cache_params)
350
  if cached_result:
 
351
  return cached_result
352
 
353
  # 닀쀑 μ‹œλ“œ 생성
354
  num_candidates = MULTI_SEED_OPTIONS.get(multi_seed_mode, 1)
 
355
 
356
  if num_candidates == 1:
357
  # κΈ°μ‘΄ ν•¨μˆ˜ 호좜
358
  result = original_func(
359
- audio_duration, prompt, lyrics, infer_step, guidance_scale,
360
  scheduler_type, cfg_type, omega_scale, manual_seeds,
361
  guidance_interval, guidance_interval_decay, min_guidance_scale,
362
  use_erg_tag, use_erg_lyric, use_erg_diffusion, oss_steps,
@@ -364,58 +371,19 @@ def create_enhanced_process_func(original_func):
364
  ref_audio_strength, ref_audio_input, lora_name_or_path, **kwargs
365
  )
366
  else:
367
- # 닀쀑 μ‹œλ“œ 생성 및 졜적 선택
368
- candidates = []
369
-
370
- for i in range(num_candidates):
371
- seed = random.randint(1, 10000)
372
-
373
- try:
374
- result = original_func(
375
- audio_duration, prompt, lyrics, infer_step, guidance_scale,
376
- scheduler_type, cfg_type, omega_scale, str(seed),
377
- guidance_interval, guidance_interval_decay, min_guidance_scale,
378
- use_erg_tag, use_erg_lyric, use_erg_diffusion, oss_steps,
379
- guidance_scale_text, guidance_scale_lyric, audio2audio_enable,
380
- ref_audio_strength, ref_audio_input, lora_name_or_path, **kwargs
381
- )
382
-
383
- if result and len(result) > 0:
384
- audio_path = result[0] # 첫 번째 κ²°κ³Όκ°€ μ˜€λ””μ˜€ 파일 경둜
385
- if audio_path and os.path.exists(audio_path):
386
- quality_score = calculate_quality_score(audio_path)
387
- candidates.append({
388
- "result": result,
389
- "quality_score": quality_score,
390
- "seed": seed
391
- })
392
- except Exception as e:
393
- print(f"Generation {i+1} failed: {e}")
394
- continue
395
-
396
- if candidates:
397
- # 졜고 ν’ˆμ§ˆ 선택
398
- best_candidate = max(candidates, key=lambda x: x["quality_score"])
399
- result = best_candidate["result"]
400
-
401
- # ν’ˆμ§ˆ 정보 μΆ”κ°€
402
- if len(result) > 1 and isinstance(result[1], dict):
403
- result[1]["quality_score"] = best_candidate["quality_score"]
404
- result[1]["selected_seed"] = best_candidate["seed"]
405
- result[1]["candidates_count"] = len(candidates)
406
- else:
407
- # λͺ¨λ“  생성 μ‹€νŒ¨μ‹œ κΈ°λ³Έ 생성
408
- result = original_func(
409
- audio_duration, prompt, lyrics, infer_step, guidance_scale,
410
- scheduler_type, cfg_type, omega_scale, manual_seeds,
411
- guidance_interval, guidance_interval_decay, min_guidance_scale,
412
- use_erg_tag, use_erg_lyric, use_erg_diffusion, oss_steps,
413
- guidance_scale_text, guidance_scale_lyric, audio2audio_enable,
414
- ref_audio_strength, ref_audio_input, lora_name_or_path, **kwargs
415
- )
416
 
417
  # κ²°κ³Ό μΊμ‹œ
418
  generation_cache.cache_result(cache_params, result)
 
419
  return result
420
 
421
  return enhanced_func
@@ -441,8 +409,41 @@ def create_output_ui(task_name="Text2Music"):
441
  return outputs, input_params_json
442
 
443
  def dump_func(*args):
444
- print(args)
445
- return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
446
 
447
  def create_text2music_ui(
448
  gr,
@@ -452,9 +453,6 @@ def create_text2music_ui(
452
  ):
453
  # ν–₯μƒλœ ν”„λ‘œμ„ΈμŠ€ ν•¨μˆ˜ 생성
454
  enhanced_process_func = create_enhanced_process_func(text2music_process_func)
455
-
456
- # UI μš”μ†Œλ₯Ό μ €μž₯ν•  λ”•μ…”λ„ˆλ¦¬
457
- ui = {}
458
 
459
  with gr.Row():
460
  with gr.Column():
@@ -462,23 +460,21 @@ def create_text2music_ui(
462
  with gr.Group():
463
  gr.Markdown("### ⚑ ν’ˆμ§ˆ & μ„±λŠ₯ μ„€μ •")
464
  with gr.Row():
465
- ui['quality_preset'] = gr.Dropdown(
466
  choices=list(QUALITY_PRESETS.keys()),
467
  value="Standard",
468
  label="ν’ˆμ§ˆ 프리셋",
469
- scale=2,
470
- interactive=True
471
  )
472
- ui['multi_seed_mode'] = gr.Dropdown(
473
  choices=list(MULTI_SEED_OPTIONS.keys()),
474
  value="Single",
475
  label="닀쀑 생성 λͺ¨λ“œ",
476
  scale=2,
477
- info="μ—¬λŸ¬ 번 μƒμ„±ν•˜μ—¬ 졜고 ν’ˆμ§ˆ 선택",
478
- interactive=True
479
  )
480
 
481
- ui['preset_description'] = gr.Textbox(
482
  value=QUALITY_PRESETS["Standard"]["description"],
483
  label="μ„€λͺ…",
484
  interactive=False,
@@ -486,7 +482,8 @@ def create_text2music_ui(
486
  )
487
 
488
  with gr.Row(equal_height=True):
489
- ui['audio_duration'] = gr.Slider(
 
490
  -1,
491
  240.0,
492
  step=0.00001,
@@ -496,32 +493,32 @@ def create_text2music_ui(
496
  info="-1 means random duration (30 ~ 240).",
497
  scale=7,
498
  )
499
- ui['random_bnt'] = gr.Button("🎲 Random", variant="secondary", scale=1)
500
- ui['preview_bnt'] = gr.Button("🎡 Preview", variant="secondary", scale=2)
501
 
502
  # audio2audio
503
  with gr.Row(equal_height=True):
504
- ui['audio2audio_enable'] = gr.Checkbox(
505
  label="Enable Audio2Audio",
506
  value=False,
507
  info="Check to enable Audio-to-Audio generation using a reference audio.",
508
  elem_id="audio2audio_checkbox"
509
  )
510
- ui['lora_name_or_path'] = gr.Dropdown(
511
  label="Lora Name or Path",
512
  choices=["ACE-Step/ACE-Step-v1-chinese-rap-LoRA", "none"],
513
  value="none",
514
  allow_custom_value=True,
515
  )
516
 
517
- ui['ref_audio_input'] = gr.Audio(
518
  type="filepath",
519
  label="Reference Audio (for Audio2Audio)",
520
  visible=False,
521
  elem_id="ref_audio_input",
522
  show_download_button=True
523
  )
524
- ui['ref_audio_strength'] = gr.Slider(
525
  label="Refer audio strength",
526
  minimum=0.0,
527
  maximum=1.0,
@@ -532,66 +529,81 @@ def create_text2music_ui(
532
  interactive=True,
533
  )
534
 
 
 
 
 
 
 
 
 
 
 
 
 
535
  with gr.Column(scale=2):
536
  with gr.Group():
537
  gr.Markdown("""### 🎼 슀마트 ν”„λ‘¬ν”„νŠΈ μ‹œμŠ€ν…œ
538
- <center>μž₯λ₯΄μ™€ μŠ€νƒ€μΌμ„ μ„ νƒν•˜λ©΄ μžλ™μœΌλ‘œ μ΅œμ ν™”λœ νƒœκ·Έκ°€ μΆ”κ°€λ©λ‹ˆλ‹€.</center>""")
539
 
540
  with gr.Row():
541
- ui['genre_preset'] = gr.Dropdown(
542
  choices=["Custom"] + list(GENRE_PRESETS.keys()),
543
  value="Custom",
544
  label="μž₯λ₯΄ 프리셋",
545
  scale=1,
546
- interactive=True
547
  )
548
- ui['song_style'] = gr.Dropdown(
549
- choices=list(SONG_STYLES.keys()),
550
- value="λ“€μ—£ (남녀 ν˜Όμ„±)",
551
- label="곑 μŠ€νƒ€μΌ",
552
- scale=1,
553
- interactive=True
554
- )
555
- ui['enable_smart_enhancement'] = gr.Checkbox(
556
  label="슀마트 ν–₯상",
557
  value=True,
558
  info="μžλ™ νƒœκ·Έ μ΅œμ ν™”",
559
  scale=1
560
  )
561
 
562
- ui['prompt'] = gr.Textbox(
563
  lines=2,
564
  label="Tags",
565
  max_lines=4,
566
  value=TAG_DEFAULT,
567
  placeholder="콀마둜 κ΅¬λΆ„λœ νƒœκ·Έλ“€...",
568
- interactive=True
569
  )
570
 
 
571
  with gr.Group():
572
- gr.Markdown("""### πŸ“ AI μž‘μ‚¬ μ‹œμŠ€ν…œ
573
  <center>주제λ₯Ό μž…λ ₯ν•˜κ³  'AI μž‘μ‚¬' λ²„νŠΌμ„ ν΄λ¦­ν•˜λ©΄ μžλ™μœΌλ‘œ 가사가 μƒμ„±λ©λ‹ˆλ‹€.</center>""")
574
 
575
  with gr.Row():
576
- ui['lyric_prompt'] = gr.Textbox(
577
  label="μž‘μ‚¬ 주제",
578
- placeholder="예: μ²«μ‚¬λž‘μ˜ μ„€λ ˜, μ΄λ³„μ˜ μ•„ν””, 희망찬 내일...",
579
  scale=3,
580
  interactive=True
581
  )
582
- ui['generate_lyrics_btn'] = gr.Button("πŸ€– AI μž‘μ‚¬", variant="secondary", scale=1)
583
 
584
- ui['lyrics'] = gr.Textbox(
 
 
 
 
 
 
 
 
 
 
 
 
585
  lines=9,
586
  label="Lyrics",
587
  max_lines=13,
588
  value=LYRIC_DEFAULT,
589
- placeholder="가사λ₯Ό μž…λ ₯ν•˜μ„Έμš”. [verse], [chorus] λ“±μ˜ ꡬ쑰 νƒœκ·Έ μ‚¬μš©μ„ ꢌμž₯ν•©λ‹ˆλ‹€.",
590
- interactive=True
591
  )
592
 
593
  with gr.Accordion("Basic Settings", open=False):
594
- ui['infer_step'] = gr.Slider(
595
  minimum=1,
596
  maximum=300,
597
  step=1,
@@ -599,7 +611,7 @@ def create_text2music_ui(
599
  label="Infer Steps",
600
  interactive=True,
601
  )
602
- ui['guidance_scale'] = gr.Slider(
603
  minimum=0.0,
604
  maximum=30.0,
605
  step=0.1,
@@ -608,7 +620,7 @@ def create_text2music_ui(
608
  interactive=True,
609
  info="When guidance_scale_lyric > 1 and guidance_scale_text > 1, the guidance scale will not be applied.",
610
  )
611
- ui['guidance_scale_text'] = gr.Slider(
612
  minimum=0.0,
613
  maximum=10.0,
614
  step=0.1,
@@ -617,7 +629,7 @@ def create_text2music_ui(
617
  interactive=True,
618
  info="Guidance scale for text condition. It can only apply to cfg. set guidance_scale_text=5.0, guidance_scale_lyric=1.5 for start",
619
  )
620
- ui['guidance_scale_lyric'] = gr.Slider(
621
  minimum=0.0,
622
  maximum=10.0,
623
  step=0.1,
@@ -626,7 +638,7 @@ def create_text2music_ui(
626
  interactive=True,
627
  )
628
 
629
- ui['manual_seeds'] = gr.Textbox(
630
  label="manual seeds (default None)",
631
  placeholder="1,2,3,4",
632
  value=None,
@@ -634,37 +646,37 @@ def create_text2music_ui(
634
  )
635
 
636
  with gr.Accordion("Advanced Settings", open=False):
637
- ui['scheduler_type'] = gr.Radio(
638
  ["euler", "heun"],
639
  value="euler",
640
  label="Scheduler Type",
641
  elem_id="scheduler_type",
642
  info="Scheduler type for the generation. euler is recommended. heun will take more time.",
643
  )
644
- ui['cfg_type'] = gr.Radio(
645
  ["cfg", "apg", "cfg_star"],
646
  value="apg",
647
  label="CFG Type",
648
  elem_id="cfg_type",
649
  info="CFG type for the generation. apg is recommended. cfg and cfg_star are almost the same.",
650
  )
651
- ui['use_erg_tag'] = gr.Checkbox(
652
  label="use ERG for tag",
653
  value=True,
654
  info="Use Entropy Rectifying Guidance for tag. It will multiple a temperature to the attention to make a weaker tag condition and make better diversity.",
655
  )
656
- ui['use_erg_lyric'] = gr.Checkbox(
657
  label="use ERG for lyric",
658
  value=False,
659
  info="The same but apply to lyric encoder's attention.",
660
  )
661
- ui['use_erg_diffusion'] = gr.Checkbox(
662
  label="use ERG for diffusion",
663
  value=True,
664
  info="The same but apply to diffusion model's attention.",
665
  )
666
 
667
- ui['omega_scale'] = gr.Slider(
668
  minimum=-100.0,
669
  maximum=100.0,
670
  step=0.1,
@@ -674,7 +686,7 @@ def create_text2music_ui(
674
  info="Granularity scale for the generation. Higher values can reduce artifacts",
675
  )
676
 
677
- ui['guidance_interval'] = gr.Slider(
678
  minimum=0.0,
679
  maximum=1.0,
680
  step=0.01,
@@ -683,7 +695,7 @@ def create_text2music_ui(
683
  interactive=True,
684
  info="Guidance interval for the generation. 0.5 means only apply guidance in the middle steps (0.25 * infer_steps to 0.75 * infer_steps)",
685
  )
686
- ui['guidance_interval_decay'] = gr.Slider(
687
  minimum=0.0,
688
  maximum=1.0,
689
  step=0.01,
@@ -692,7 +704,7 @@ def create_text2music_ui(
692
  interactive=True,
693
  info="Guidance interval decay for the generation. Guidance scale will decay from guidance_scale to min_guidance_scale in the interval. 0.0 means no decay.",
694
  )
695
- ui['min_guidance_scale'] = gr.Slider(
696
  minimum=0.0,
697
  maximum=200.0,
698
  step=0.1,
@@ -701,194 +713,732 @@ def create_text2music_ui(
701
  interactive=True,
702
  info="Min guidance scale for guidance interval decay's end scale",
703
  )
704
- ui['oss_steps'] = gr.Textbox(
705
  label="OSS Steps",
706
  placeholder="16, 29, 52, 96, 129, 158, 172, 183, 189, 200",
707
  value=None,
708
  info="Optimal Steps for the generation. But not test well",
709
  )
710
 
711
- ui['text2music_bnt'] = gr.Button("🎡 Generate Music", variant="primary", size="lg")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
712
 
713
  with gr.Column():
714
  outputs, input_params_json = create_output_ui()
715
- # (retake, repainting, edit, extend λ“± 탭듀은 μƒλž΅)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
716
 
717
- # [MODIFIED] μ•„λž˜λΆ€ν„°λŠ” @gr.on(...) λŒ€μ‹  μ΅œμ‹  이벀트 바인딩 λ°©μ‹μœΌλ‘œ μ—°κ²°
 
 
 
 
718
 
719
- # 1) Audio2Audio ν† κΈ€
720
- def _toggle_audio2audio(x):
721
- return (gr.update(visible=x), gr.update(visible=x))
 
 
 
 
 
 
722
 
723
- ui['audio2audio_enable'].change(
724
- fn=_toggle_audio2audio,
725
- inputs=[ui['audio2audio_enable']],
726
- outputs=[ui['ref_audio_input'], ui['ref_audio_strength']]
727
- )
728
-
729
- # 2) μž₯λ₯΄ λ³€κ²½ ν•Έλ“€λŸ¬
730
- def update_tags_for_genre(genre, style):
731
- print(f"🎡 Genre changed: {genre}, Style: {style}")
732
- if genre == "Custom":
733
- return TAG_DEFAULT
734
- tags = GENRE_PRESETS.get(genre, TAG_DEFAULT)
735
- if style in SONG_STYLES:
736
- tags = f"{tags}, {SONG_STYLES[style]}"
737
- return tags
738
-
739
- ui['genre_preset'].change(
740
- fn=update_tags_for_genre,
741
- inputs=[ui['genre_preset'], ui['song_style']],
742
- outputs=[ui['prompt']]
743
- )
744
-
745
- # 3) 곑 μŠ€νƒ€μΌ λ³€κ²½ ν•Έλ“€λŸ¬
746
- def update_tags_for_style(genre, style):
747
- print(f"🎀 Style changed: {style}, Genre: {genre}")
748
- if genre == "Custom":
749
- base_tags = TAG_DEFAULT
750
- else:
751
- base_tags = GENRE_PRESETS.get(genre, TAG_DEFAULT)
752
-
753
- if style in SONG_STYLES:
754
- return f"{base_tags}, {SONG_STYLES[style]}"
755
- return base_tags
756
-
757
- ui['song_style'].change(
758
- fn=update_tags_for_style,
759
- inputs=[ui['genre_preset'], ui['song_style']],
760
- outputs=[ui['prompt']]
761
- )
762
-
763
- # 4) ν’ˆμ§ˆ 프리셋 λ³€κ²½
764
- def update_quality_settings(preset):
765
- print(f"⚑ Quality preset: {preset}")
766
- if preset not in QUALITY_PRESETS:
767
- return ("", 150, 15.0, "euler", 10.0, True, True)
768
-
769
- p = QUALITY_PRESETS[preset]
770
- return (
771
- p["description"],
772
- p["infer_step"],
773
- p["guidance_scale"],
774
- p["scheduler_type"],
775
- p["omega_scale"],
776
- p["use_erg_diffusion"],
777
- p["use_erg_tag"]
778
- )
779
-
780
- ui['quality_preset'].change(
781
- fn=update_quality_settings,
782
- inputs=[ui['quality_preset']],
783
- outputs=[
784
- ui['preset_description'],
785
- ui['infer_step'],
786
- ui['guidance_scale'],
787
- ui['scheduler_type'],
788
- ui['omega_scale'],
789
- ui['use_erg_diffusion'],
790
- ui['use_erg_tag']
791
- ]
792
- )
793
 
794
- # 5) AI μž‘μ‚¬
795
- def generate_lyrics_handler(prompt, genre, style):
796
- print(f"πŸ€– Generate lyrics: {prompt}")
797
- if not prompt or prompt.strip() == "":
798
- # Gradio μ΅œμ‹  λ²„μ „μ—μ„œλŠ” gr.Warning λŒ€μ‹  이벀트 바인딩 ν›„ return이 κΈ°λ³Έ
799
- return "⚠️ μž‘μ‚¬ 주제λ₯Ό μž…λ ₯ν•΄μ£Όμ„Έμš”!"
800
- return generate_lyrics_with_ai(prompt, genre, style)
801
-
802
- ui['generate_lyrics_btn'].click(
803
- fn=generate_lyrics_handler,
804
- inputs=[ui['lyric_prompt'], ui['genre_preset'], ui['song_style']],
805
- outputs=[ui['lyrics']]
806
- )
807
 
808
- # 6) Random λ²„νŠΌ
809
- def random_generation(genre, style):
810
- print("🎲 Random generation")
811
- if genre == "Custom":
812
- genre = random.choice(list(GENRE_PRESETS.keys()))
813
-
814
- themes = ["λ„μ‹œμ˜ λ°€", "μ²«μ‚¬λž‘", "여름 ν•΄λ³€", "가을 μ •μ·¨"]
815
- theme = random.choice(themes)
816
- duration = random.choice([30, 60, 90, 120])
817
-
818
- tags = GENRE_PRESETS.get(genre, TAG_DEFAULT)
819
- if style in SONG_STYLES:
820
- tags = f"{tags}, {SONG_STYLES[style]}"
821
-
822
- new_lyrics = generate_lyrics_with_ai(theme, genre, style)
823
-
824
- return [
825
- duration,
826
- tags,
827
- new_lyrics,
828
- 150, 15.0,
829
- "euler",
830
- "apg",
831
- 10.0,
832
- str(random.randint(1, 10000)),
833
- 0.5,
834
- 0.0,
835
- 3.0,
836
- True,
837
- False,
838
- True,
839
- None,
840
- 0.0,
841
- 0.0,
842
- False,
843
- 0.5,
844
- None
845
- ]
846
-
847
- ui['random_bnt'].click(
848
- fn=random_generation,
849
- inputs=[ui['genre_preset'], ui['song_style']],
850
- outputs=[
851
- ui['audio_duration'],
852
- ui['prompt'],
853
- ui['lyrics'],
854
- ui['infer_step'],
855
- ui['guidance_scale'],
856
- ui['scheduler_type'],
857
- ui['cfg_type'],
858
- ui['omega_scale'],
859
- ui['manual_seeds'],
860
- ui['guidance_interval'],
861
- ui['guidance_interval_decay'],
862
- ui['min_guidance_scale'],
863
- ui['use_erg_tag'],
864
- ui['use_erg_lyric'],
865
- ui['use_erg_diffusion'],
866
- ui['oss_steps'],
867
- ui['guidance_scale_text'],
868
- ui['guidance_scale_lyric'],
869
- ui['audio2audio_enable'],
870
- ui['ref_audio_strength'],
871
- ui['ref_audio_input']
872
- ]
873
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
874
 
875
- # 7) 메인 생성 λ²„νŠΌ
876
- ui['text2music_bnt'].click(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
877
  fn=enhanced_process_func,
878
  inputs=[
879
- ui['audio_duration'], ui['prompt'], ui['lyrics'], ui['infer_step'],
880
- ui['guidance_scale'], ui['scheduler_type'], ui['cfg_type'], ui['omega_scale'],
881
- ui['manual_seeds'], ui['guidance_interval'], ui['guidance_interval_decay'],
882
- ui['min_guidance_scale'], ui['use_erg_tag'], ui['use_erg_lyric'],
883
- ui['use_erg_diffusion'], ui['oss_steps'], ui['guidance_scale_text'],
884
- ui['guidance_scale_lyric'], ui['audio2audio_enable'], ui['ref_audio_strength'],
885
- ui['ref_audio_input'], ui['lora_name_or_path'], ui['multi_seed_mode'],
886
- ui['enable_smart_enhancement'], ui['genre_preset'], ui['song_style']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
887
  ],
888
- outputs=outputs + [input_params_json]
889
  )
890
-
891
- print("βœ… 이벀트 ν•Έλ“€λŸ¬ μ—°κ²° μ™„λ£Œ!")
892
 
893
  def create_main_demo_ui(
894
  text2music_process_func=dump_func,
@@ -896,196 +1446,93 @@ def create_main_demo_ui(
896
  load_data_func=dump_func,
897
  ):
898
  with gr.Blocks(
899
- title="ACE-Step Model 1.0 DEMO - Enhanced",
900
  theme=gr.themes.Soft(),
901
  css="""
902
- /* κ·ΈλΌλ””μ–ΈνŠΈ λ°°κ²½ */
903
  .gradio-container {
904
  max-width: 1200px !important;
905
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
906
- min-height: 100vh;
907
- }
908
-
909
- /* 메인 μ»¨ν…Œμ΄λ„ˆ μŠ€νƒ€μΌ */
910
- .main-container {
911
- background: rgba(255, 255, 255, 0.95);
912
- border-radius: 20px;
913
- padding: 30px;
914
- margin: 20px auto;
915
- box-shadow: 0 20px 40px rgba(0, 0, 0, 0.1);
916
- }
917
-
918
- /* 헀더 μŠ€νƒ€μΌ */
919
- .header-title {
920
- background: linear-gradient(45deg, #667eea, #764ba2);
921
- -webkit-background-clip: text;
922
- -webkit-text-fill-color: transparent;
923
- font-size: 3em;
924
- font-weight: bold;
925
- text-align: center;
926
- margin-bottom: 10px;
927
- }
928
-
929
- /* λ²„νŠΌ μŠ€νƒ€μΌ */
930
- .gr-button-primary {
931
- background: linear-gradient(45deg, #667eea, #764ba2) !important;
932
- border: none !important;
933
- color: white !important;
934
- font-weight: bold !important;
935
- transition: all 0.3s ease !important;
936
- }
937
-
938
- .gr-button-primary:hover {
939
- transform: translateY(-2px);
940
- box-shadow: 0 10px 20px rgba(102, 126, 234, 0.3);
941
- }
942
-
943
- .gr-button-secondary {
944
- background: linear-gradient(45deg, #f093fb, #f5576c) !important;
945
- border: none !important;
946
- color: white !important;
947
- transition: all 0.3s ease !important;
948
- }
949
-
950
- /* κ·Έλ£Ή μŠ€νƒ€μΌ */
951
- .gr-group {
952
- background: rgba(255, 255, 255, 0.8) !important;
953
- border: 1px solid rgba(102, 126, 234, 0.2) !important;
954
- border-radius: 15px !important;
955
- padding: 20px !important;
956
- margin: 10px 0 !important;
957
- backdrop-filter: blur(10px) !important;
958
- }
959
-
960
- /* νƒ­ μŠ€νƒ€μΌ */
961
- .gr-tab {
962
- background: rgba(255, 255, 255, 0.9) !important;
963
- border-radius: 10px !important;
964
- padding: 15px !important;
965
- }
966
-
967
- /* μž…λ ₯ ν•„λ“œ μŠ€νƒ€μΌ */
968
- .gr-textbox, .gr-dropdown, .gr-slider {
969
- border: 2px solid rgba(102, 126, 234, 0.3) !important;
970
- border-radius: 10px !important;
971
- transition: all 0.3s ease !important;
972
- }
973
-
974
- .gr-textbox:focus, .gr-dropdown:focus {
975
- border-color: #667eea !important;
976
- box-shadow: 0 0 10px rgba(102, 126, 234, 0.2) !important;
977
  }
978
-
979
- /* ν’ˆμ§ˆ 정보 μŠ€νƒ€μΌ */
980
  .quality-info {
981
- background: linear-gradient(135deg, #f093fb20, #f5576c20);
 
 
 
 
 
 
982
  padding: 15px;
983
  border-radius: 10px;
984
  margin: 10px 0;
985
- border: 1px solid rgba(240, 147, 251, 0.3);
986
- }
987
-
988
- /* μ• λ‹ˆλ©”μ΄μ…˜ */
989
- @keyframes fadeIn {
990
- from {
991
- opacity: 0;
992
- transform: translateY(20px);
993
- }
994
- to {
995
- opacity: 1;
996
- transform: translateY(0);
997
- }
998
- }
999
-
1000
- .gr-row, .gr-column {
1001
- animation: fadeIn 0.5s ease-out;
1002
- }
1003
-
1004
- /* μŠ€ν¬λ‘€λ°” μŠ€νƒ€μΌ */
1005
- ::-webkit-scrollbar {
1006
- width: 10px;
1007
- }
1008
-
1009
- ::-webkit-scrollbar-track {
1010
- background: rgba(255, 255, 255, 0.1);
1011
- border-radius: 10px;
1012
- }
1013
-
1014
- ::-webkit-scrollbar-thumb {
1015
- background: linear-gradient(45deg, #667eea, #764ba2);
1016
- border-radius: 10px;
1017
- }
1018
-
1019
- /* λ§ˆν¬λ‹€μš΄ μŠ€νƒ€μΌ */
1020
- .gr-markdown {
1021
- color: #4a5568 !important;
1022
- }
1023
-
1024
- .gr-markdown h3 {
1025
- color: #667eea !important;
1026
- font-weight: 600 !important;
1027
- margin: 15px 0 !important;
1028
  }
1029
  """
1030
  ) as demo:
1031
- with gr.Column(elem_classes="main-container"):
1032
- gr.HTML(
1033
- """
1034
- <h1 class="header-title">🎡 ACE-Step PRO</h1>
1035
- <div style="text-align: center; margin: 20px;">
1036
- <p style="font-size: 1.2em; color: #4a5568;"><strong>πŸš€ μƒˆλ‘œμš΄ κΈ°λŠ₯:</strong> AI μž‘μ‚¬ | ν’ˆμ§ˆ 프리셋 | 닀쀑 생성 | 슀마트 ν”„λ‘¬ν”„νŠΈ | μ‹€μ‹œκ°„ 프리뷰</p>
1037
- <p style="margin-top: 10px;">
1038
- <a href="https://ace-step.github.io/" target='_blank' style="color: #667eea; text-decoration: none; margin: 0 10px;">πŸ“„ Project</a> |
1039
- <a href="https://huggingface.co/ACE-Step/ACE-Step-v1-3.5B" style="color: #667eea; text-decoration: none; margin: 0 10px;">πŸ€— Checkpoints</a> |
1040
- <a href="https://discord.gg/rjAZz2xBdG" target='_blank' style="color: #667eea; text-decoration: none; margin: 0 10px;">πŸ’¬ Discord</a>
1041
- </p>
1042
- </div>
1043
- """
1044
- )
 
 
 
 
 
 
 
 
 
1045
 
1046
- # μ‚¬μš©λ²• κ°€μ΄λ“œ μΆ”κ°€
1047
- with gr.Accordion("πŸ“– μ‚¬μš©λ²• κ°€μ΄λ“œ", open=False):
1048
- gr.Markdown("""
1049
- ### 🎯 λΉ λ₯Έ μ‹œμž‘
1050
- 1. **μž₯λ₯΄ & μŠ€νƒ€μΌ 선택**: μ›ν•˜λŠ” μŒμ•… μž₯λ₯΄μ™€ 곑 μŠ€νƒ€μΌ(λ“€μ—£, μ†”λ‘œ λ“±)을 μ„ νƒν•©λ‹ˆλ‹€
1051
- 2. **AI μž‘μ‚¬**: 주제λ₯Ό μž…λ ₯ν•˜κ³  'AI μž‘μ‚¬' λ²„νŠΌμœΌλ‘œ μžλ™ 가사λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€
1052
- 3. **ν’ˆμ§ˆ μ„€μ •**: Draft(빠름) β†’ Standard(ꢌμž₯) β†’ High Quality β†’ Ultra 쀑 선택
1053
- 4. **닀쀑 생성**: "Best of 3/5/10" μ„ νƒν•˜λ©΄ μ—¬λŸ¬ 번 μƒμ„±ν•˜μ—¬ 졜고 ν’ˆμ§ˆμ„ μžλ™ μ„ νƒν•©λ‹ˆλ‹€
1054
- 5. **프리뷰**: 전체 생성 μ „ 10초 ν”„λ¦¬λ·°λ‘œ λΉ λ₯΄κ²Œ 확인할 수 μžˆμŠ΅λ‹ˆλ‹€
1055
-
1056
- ### πŸ’‘ ν’ˆμ§ˆ ν–₯상 팁
1057
- - **κ³ ν’ˆμ§ˆ 생성**: "High Quality" + "Best of 5" μ‘°ν•© μΆ”μ²œ
1058
- - **λΉ λ₯Έ ν…ŒμŠ€νŠΈ**: "Draft" + "프리뷰" κΈ°λŠ₯ ν™œμš©
1059
- - **μž₯λ₯΄ νŠΉν™”**: μž₯λ₯΄ 프리셋 선택 ν›„ "슀마트 ν–₯상" 체크
1060
- - **가사 ꡬ쑰**: [verse], [chorus], [bridge] νƒœκ·Έ 적극 ν™œμš©
1061
- - **λ‹€κ΅­μ–΄ 지원**: ν•œκ΅­μ–΄λ‘œ 주제λ₯Ό μž…λ ₯ν•˜λ©΄ ν•œκ΅­μ–΄ 가사가 μƒμ„±λ©λ‹ˆλ‹€
1062
-
1063
- ### ⚠️ OpenAI API μ„€μ •
1064
- AI μž‘μ‚¬ κΈ°λŠ₯을 μ‚¬μš©ν•˜λ €λ©΄ ν™˜κ²½λ³€μˆ˜μ— OpenAI API ν‚€λ₯Ό μ„€μ •ν•΄μ•Ό ν•©λ‹ˆλ‹€:
1065
- ```bash
1066
- export LLM_API="your-openai-api-key"
1067
- # λ˜λŠ”
1068
- export OPENAI_API_KEY="your-openai-api-key"
1069
- ```
1070
- """)
1071
 
1072
- with gr.Tab("🎡 Enhanced Text2Music", elem_classes="gr-tab"):
1073
- create_text2music_ui(
1074
- gr=gr,
1075
- text2music_process_func=text2music_process_func,
1076
- sample_data_func=sample_data_func,
1077
- load_data_func=load_data_func,
1078
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
1079
  return demo
1080
 
1081
 
1082
  if __name__ == "__main__":
1083
- print("πŸš€ ACE-Step PRO μ‹œμž‘ 쀑...")
 
 
 
 
 
 
 
 
1084
  demo = create_main_demo_ui()
1085
-
1086
  demo.launch(
1087
  server_name="0.0.0.0",
1088
  server_port=7860,
1089
- share=True, # 곡유 링크
1090
- ssr_mode=False # ← SSR λΉ„ν™œμ„±ν™”
1091
- )
 
1
+ """
2
+ ACE-Step: A Step Towards Music Generation Foundation Model
3
+
4
+ https://github.com/ace-step/ACE-Step
5
+
6
+ Apache 2.0 License
7
+ """
8
+
9
  import gradio as gr
10
  import librosa
11
  import os
 
15
  import json
16
  from typing import Dict, List, Tuple, Optional
17
 
18
+ # [ADDED] OpenAI API μ„€μ •
19
  try:
20
+ from openai import OpenAI
21
+ api_key = os.getenv("LLM_API")
22
  if api_key:
23
+ client = OpenAI(api_key=api_key)
24
  client_available = True
25
  print("βœ… OpenAI API client initialized successfully")
26
  else:
27
+ client = None
28
  client_available = False
29
  print("⚠️ Warning: No OpenAI API key found. AI lyrics generation will be disabled.")
30
+ print("Set environment variable: export LLM_API='your-openai-api-key'")
31
  except Exception as e:
32
+ client = None
33
  client_available = False
34
  print(f"❌ Warning: Failed to initialize OpenAI client: {e}")
35
 
36
+ TAG_DEFAULT = "funk, pop, soul, rock, melodic, guitar, drums, bass, keyboard, percussion, 105 BPM, energetic, upbeat, groovy, vibrant, dynamic"
37
+ LYRIC_DEFAULT = """[verse]
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  Neon lights they flicker bright
39
  City hums in dead of night
40
  Rhythms pulse through concrete veins
41
  Lost in echoes of refrains
42
 
43
+ [verse]
44
  Bassline groovin' in my chest
45
  Heartbeats match the city's zest
46
  Electric whispers fill the air
47
  Synthesized dreams everywhere
48
 
49
+ [chorus]
50
  Turn it up and let it flow
51
  Feel the fire let it grow
52
  In this rhythm we belong
53
  Hear the night sing out our song
54
 
55
+ [verse]
56
  Guitar strings they start to weep
57
  Wake the soul from silent sleep
58
  Every note a story told
59
  In this night we're bold and gold
60
 
61
+ [bridge]
62
  Voices blend in harmony
63
  Lost in pure cacophony
64
  Timeless echoes timeless cries
65
  Soulful shouts beneath the skies
66
 
67
+ [verse]
68
  Keyboard dances on the keys
69
  Melodies on evening breeze
70
  Catch the tune and hold it tight
71
  In this moment we take flight
72
  """
73
 
74
+ # [ADDED] AI μž‘μ‚¬ μ‹œμŠ€ν…œ ν”„λ‘¬ν”„νŠΈ
75
+ LYRIC_SYSTEM_PROMPT = """λ„ˆλŠ” λ…Έλž˜ 가사λ₯Ό μž‘μ‚¬ν•˜λŠ” μ „λ¬Έκ°€ 역할이닀. μ΄μš©μžκ°€ μž…λ ₯ν•˜λŠ” μ£Όμ œμ— 따라 이에 κ΄€λ ¨λœ λ…Έλž˜ 가사λ₯Ό μž‘μ„±ν•˜λΌ. κ°€μ‚¬μ˜ κ·œμΉ™μ€ "[ ]"둜 κ΅¬λΆ„ν•˜μ—¬, λ‹€μŒ μ˜ˆμ‹œλ₯Ό μ°Έμ‘°ν•˜λΌ.
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
+ μ˜ˆμ‹œ:
78
+ [verse]
79
+ Neon lights they flicker bright
80
+ City hums in dead of night
81
+ Rhythms pulse through concrete veins
82
+ Lost in echoes of refrains
 
 
 
 
83
 
84
+ [verse]
85
+ Bassline groovin' in my chest
86
+ Heartbeats match the city's zest
87
+ Electric whispers fill the air
88
+ Synthesized dreams everywhere
89
+
90
+ [chorus]
91
+ Turn it up and let it flow
92
+ Feel the fire let it grow
93
+ In this rhythm we belong
94
+ Hear the night sing out our song
95
+
96
+ [verse]
97
+ Guitar strings they start to weep
98
+ Wake the soul from silent sleep
99
+ Every note a story told
100
+ In this night we're bold and gold
 
 
 
 
101
 
102
+ [bridge]
103
+ Voices blend in harmony
104
+ Lost in pure cacophony
105
+ Timeless echoes timeless cries
106
+ Soulful shouts beneath the skies
107
+
108
+ [verse]
109
+ Keyboard dances on the keys
110
+ Melodies on evening breeze
111
+ Catch the tune and hold it tight
112
+ In this moment we take flight
113
+
114
+ κ·œμΉ™:
115
+ 1. λ°˜λ“œμ‹œ [verse], [chorus], [bridge] λ“±μ˜ ꡬ쑰 νƒœκ·Έλ₯Ό μ‚¬μš©ν•  것
116
+ 2. μž…λ ₯ 언어와 λ™μΌν•œ μ–Έμ–΄λ‘œ 가사λ₯Ό μž‘μ„±ν•  것
117
+ 3. 각 μ„Ήμ…˜μ€ 4-8쀄 μ •λ„λ‘œ ꡬ성할 것
118
+ 4. μ£Όμ œμ™€ 감정에 λ§žλŠ” 운율과 리듬감 μžˆλŠ” 가사λ₯Ό μž‘μ„±ν•  것"""
119
+
120
+ # [ADDED] AI μž‘μ‚¬ 생성 ν•¨μˆ˜
121
+ def generate_lyrics_with_ai(theme: str, genre: str = None) -> str:
122
+ """AIλ₯Ό μ‚¬μš©ν•˜μ—¬ 주제 기반 가사 생성"""
123
+ print(f"🎡 AI μž‘μ‚¬ μ‹œμž‘: 주제='{theme}', μž₯λ₯΄='{genre}'")
124
 
125
+ if not client_available or client is None:
 
126
  print("❌ OpenAI client not available, returning default lyrics")
127
  return LYRIC_DEFAULT
128
 
129
+ if not theme or theme.strip() == "":
130
+ print("⚠️ Empty theme, returning default lyrics")
131
  return LYRIC_DEFAULT
132
 
133
  try:
134
+ # μž₯λ₯΄ 정보가 있으면 ν”„λ‘¬ν”„νŠΈμ— μΆ”κ°€
135
+ user_prompt = f"λ‹€μŒ 주제둜 λ…Έλž˜ 가사λ₯Ό μž‘μ„±ν•΄μ£Όμ„Έμš”: {theme}"
136
+ if genre and genre != "Custom":
137
+ user_prompt += f"\nμž₯λ₯΄: {genre}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
+ print(f"πŸ“ OpenAI API 호좜 쀑...")
140
 
141
+ # [MODIFIED] μ‚¬μš©μžκ°€ μ œμ‹œν•œ API ν˜•μ‹μ„ ν‘œμ€€ ν˜•μ‹μœΌλ‘œ λ³€ν™˜
142
+ # μ‹€μ œλ‘œλŠ” client.responses.createκ°€ μ•„λ‹Œ client.chat.completions.createλ₯Ό μ‚¬μš©
143
+ response = client.chat.completions.create(
144
+ model="gpt-4o-mini", # gpt-4.1-miniλŠ” μ‘΄μž¬ν•˜μ§€ μ•ŠλŠ” λͺ¨λΈλͺ…μ΄λ―€λ‘œ gpt-4o-mini μ‚¬μš©
 
145
  messages=[
146
+ {
147
+ "role": "system",
148
+ "content": LYRIC_SYSTEM_PROMPT
149
+ },
150
+ {
151
+ "role": "user",
152
+ "content": user_prompt
153
+ }
154
  ],
155
  temperature=0.8,
156
+ max_tokens=1500,
157
+ top_p=1
158
  )
 
159
 
160
  generated_lyrics = response.choices[0].message.content
161
+ print(f"βœ… AI μž‘μ‚¬ μ™„λ£Œ")
162
+ print(f"μƒμ„±λœ 가사 미리보기: {generated_lyrics[:100]}...")
163
  return generated_lyrics
164
 
165
  except Exception as e:
166
+ print(f"❌ AI μž‘μ‚¬ 생성 였λ₯˜: {e}")
167
+ import traceback
168
+ print(f"상세 였λ₯˜: {traceback.format_exc()}")
169
  return LYRIC_DEFAULT
170
 
171
+ # ν™•μž₯된 μž₯λ₯΄ 프리셋 (κΈ°μ‘΄ + κ°œμ„ λœ νƒœκ·Έ)
172
+ GENRE_PRESETS = {
173
+ "Modern Pop": "pop, synth, drums, guitar, 120 bpm, upbeat, catchy, vibrant, female vocals, polished vocals, radio-ready, commercial, layered vocals",
174
+ "Rock": "rock, electric guitar, drums, bass, 130 bpm, energetic, rebellious, gritty, male vocals, raw vocals, power chords, driving rhythm",
175
+ "Hip Hop": "hip hop, 808 bass, hi-hats, synth, 90 bpm, bold, urban, intense, male vocals, rhythmic vocals, trap beats, punchy drums",
176
+ "Country": "country, acoustic guitar, steel guitar, fiddle, 100 bpm, heartfelt, rustic, warm, male vocals, twangy vocals, storytelling, americana",
177
+ "EDM": "edm, synth, bass, kick drum, 128 bpm, euphoric, pulsating, energetic, instrumental, progressive build, festival anthem, electronic",
178
+ "Reggae": "reggae, guitar, bass, drums, 80 bpm, chill, soulful, positive, male vocals, smooth vocals, offbeat rhythm, island vibes",
179
+ "Classical": "classical, orchestral, strings, piano, 60 bpm, elegant, emotive, timeless, instrumental, dynamic range, sophisticated harmony",
180
+ "Jazz": "jazz, saxophone, piano, double bass, 110 bpm, smooth, improvisational, soulful, male vocals, crooning vocals, swing feel, sophisticated",
181
+ "Metal": "metal, electric guitar, double kick drum, bass, 160 bpm, aggressive, intense, heavy, male vocals, screamed vocals, distorted, powerful",
182
+ "R&B": "r&b, synth, bass, drums, 85 bpm, sultry, groovy, romantic, female vocals, silky vocals, smooth production, neo-soul"
183
+ }
184
+
185
  # ν’ˆμ§ˆ 프리셋 μ‹œμŠ€ν…œ μΆ”κ°€
186
  QUALITY_PRESETS = {
187
  "Draft (Fast)": {
 
257
  # μ „μ—­ μΊμ‹œ μΈμŠ€ν„΄μŠ€
258
  generation_cache = MusicGenerationCache()
259
 
260
+ def enhance_prompt_with_genre(base_prompt: str, genre: str) -> str:
261
+ """μž₯λ₯΄μ— λ”°λ₯Έ 슀마트 ν”„λ‘¬ν”„νŠΈ ν™•μž₯"""
262
+ if genre == "Custom" or not genre:
263
+ return base_prompt
264
 
265
+ # μž₯λ₯΄λ³„ μΆ”κ°€ κ°œμ„  νƒœκ·Έ
266
+ genre_enhancements = {
267
+ "Modern Pop": ["polished production", "mainstream appeal", "hook-driven"],
268
+ "Rock": ["guitar-driven", "powerful drums", "energetic performance"],
269
+ "Hip Hop": ["rhythmic flow", "urban atmosphere", "bass-heavy"],
270
+ "Country": ["acoustic warmth", "storytelling melody", "authentic feel"],
271
+ "EDM": ["electronic atmosphere", "build-ups", "dance-friendly"],
272
+ "Reggae": ["laid-back groove", "tropical vibes", "rhythmic guitar"],
273
+ "Classical": ["orchestral depth", "musical sophistication", "timeless beauty"],
274
+ "Jazz": ["musical complexity", "improvisational spirit", "sophisticated harmony"],
275
+ "Metal": ["aggressive energy", "powerful sound", "intense atmosphere"],
276
+ "R&B": ["smooth groove", "soulful expression", "rhythmic sophistication"]
277
+ }
 
 
 
 
 
 
 
278
 
279
+ if genre in genre_enhancements:
280
+ additional_tags = ", ".join(genre_enhancements[genre])
281
+ return f"{base_prompt}, {additional_tags}"
 
282
 
283
+ return base_prompt
284
 
285
  def calculate_quality_score(audio_path: str) -> float:
286
  """κ°„λ‹¨ν•œ ν’ˆμ§ˆ 점수 계산 (μ‹€μ œ κ΅¬ν˜„μ—μ„œλŠ” 더 λ³΅μž‘ν•œ λ©”νŠΈλ¦­ μ‚¬μš©)"""
 
302
  except:
303
  return 50.0 # κΈ°λ³Έκ°’
304
 
305
+ def update_tags_from_preset(preset_name):
306
+ if preset_name == "Custom":
307
+ return ""
308
+ return GENRE_PRESETS.get(preset_name, "")
309
+
310
  def update_quality_preset(preset_name):
311
  """ν’ˆμ§ˆ 프리셋 적용"""
312
  if preset_name not in QUALITY_PRESETS:
 
333
  guidance_scale_text, guidance_scale_lyric,
334
  audio2audio_enable=False, ref_audio_strength=0.5, ref_audio_input=None,
335
  lora_name_or_path="none", multi_seed_mode="Single",
336
+ enable_smart_enhancement=True, genre_preset="Custom", **kwargs
337
  ):
338
+ print(f"🎡 Enhanced generation started")
339
+ print(f"Parameters: duration={audio_duration}, prompt='{prompt[:50]}...', multi_seed={multi_seed_mode}")
340
+
341
  # 슀마트 ν”„λ‘¬ν”„νŠΈ ν™•μž₯
342
+ if enable_smart_enhancement and genre_preset != "Custom":
343
+ enhanced_prompt = enhance_prompt_with_genre(prompt, genre_preset)
344
+ print(f"Enhanced prompt: {enhanced_prompt[:100]}...")
345
+ else:
346
+ enhanced_prompt = prompt
347
 
348
  # μΊμ‹œ 확인
349
  cache_params = {
350
+ 'prompt': enhanced_prompt, 'lyrics': lyrics, 'audio_duration': audio_duration,
351
  'infer_step': infer_step, 'guidance_scale': guidance_scale
352
  }
353
 
354
  cached_result = generation_cache.get_cached_result(cache_params)
355
  if cached_result:
356
+ print("Using cached result")
357
  return cached_result
358
 
359
  # 닀쀑 μ‹œλ“œ 생성
360
  num_candidates = MULTI_SEED_OPTIONS.get(multi_seed_mode, 1)
361
+ print(f"Generating {num_candidates} candidates")
362
 
363
  if num_candidates == 1:
364
  # κΈ°μ‘΄ ν•¨μˆ˜ 호좜
365
  result = original_func(
366
+ audio_duration, enhanced_prompt, lyrics, infer_step, guidance_scale,
367
  scheduler_type, cfg_type, omega_scale, manual_seeds,
368
  guidance_interval, guidance_interval_decay, min_guidance_scale,
369
  use_erg_tag, use_erg_lyric, use_erg_diffusion, oss_steps,
 
371
  ref_audio_strength, ref_audio_input, lora_name_or_path, **kwargs
372
  )
373
  else:
374
+ # 닀쀑 μ‹œλ“œ 생성을 μœ„ν•œ μž„μ‹œ κ΅¬ν˜„
375
+ result = original_func(
376
+ audio_duration, enhanced_prompt, lyrics, infer_step, guidance_scale,
377
+ scheduler_type, cfg_type, omega_scale, manual_seeds,
378
+ guidance_interval, guidance_interval_decay, min_guidance_scale,
379
+ use_erg_tag, use_erg_lyric, use_erg_diffusion, oss_steps,
380
+ guidance_scale_text, guidance_scale_lyric, audio2audio_enable,
381
+ ref_audio_strength, ref_audio_input, lora_name_or_path, **kwargs
382
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
383
 
384
  # κ²°κ³Ό μΊμ‹œ
385
  generation_cache.cache_result(cache_params, result)
386
+ print(f"Generation completed")
387
  return result
388
 
389
  return enhanced_func
 
409
  return outputs, input_params_json
410
 
411
  def dump_func(*args):
412
+ """더미 ν•¨μˆ˜ - μ‹€μ œ μŒμ•… 생성 λŒ€μ‹  둜그만 좜λ ₯"""
413
+ print(f"🎡 Dummy function called with {len(args)} arguments")
414
+ if args:
415
+ print(f"Parameters preview: duration={args[0] if len(args) > 0 else 'N/A'}, prompt='{args[1][:50] if len(args) > 1 else 'N/A'}...'")
416
+
417
+ # κ°€μ§œ κ²°κ³Ό λ°˜ν™˜ (μ‹€μ œ κ΅¬ν˜„μ—μ„œλŠ” μ§„μ§œ μŒμ•… 생성 κ²°κ³Ό)
418
+ dummy_result = [
419
+ None, # μ˜€λ””μ˜€ 파일 경둜 (None이면 μ˜€λ””μ˜€ 생성 μ•ˆλ¨)
420
+ {
421
+ "prompt": args[1] if len(args) > 1 else "test",
422
+ "lyrics": args[2] if len(args) > 2 else "test lyrics",
423
+ "audio_duration": args[0] if len(args) > 0 else 30,
424
+ "status": "μ™„λ£Œ (더미 λͺ¨λ“œ - μ‹€μ œ μŒμ•… 생성 μ•ˆλ¨)",
425
+ "infer_step": args[3] if len(args) > 3 else 150,
426
+ "guidance_scale": args[4] if len(args) > 4 else 15.0,
427
+ "scheduler_type": args[5] if len(args) > 5 else "euler",
428
+ "cfg_type": args[6] if len(args) > 6 else "apg",
429
+ "omega_scale": args[7] if len(args) > 7 else 10.0,
430
+ "actual_seeds": [1234],
431
+ "guidance_interval": args[9] if len(args) > 9 else 0.5,
432
+ "guidance_interval_decay": args[10] if len(args) > 10 else 0.0,
433
+ "min_guidance_scale": args[11] if len(args) > 11 else 3.0,
434
+ "use_erg_tag": args[12] if len(args) > 12 else True,
435
+ "use_erg_lyric": args[13] if len(args) > 13 else False,
436
+ "use_erg_diffusion": args[14] if len(args) > 14 else True,
437
+ "oss_steps": [],
438
+ "guidance_scale_text": args[16] if len(args) > 16 else 0.0,
439
+ "guidance_scale_lyric": args[17] if len(args) > 17 else 0.0,
440
+ "audio2audio_enable": args[18] if len(args) > 18 else False,
441
+ "ref_audio_strength": args[19] if len(args) > 19 else 0.5,
442
+ "ref_audio_input": args[20] if len(args) > 20 else None,
443
+ "audio_path": None
444
+ }
445
+ ]
446
+ return dummy_result
447
 
448
  def create_text2music_ui(
449
  gr,
 
453
  ):
454
  # ν–₯μƒλœ ν”„λ‘œμ„ΈμŠ€ ν•¨μˆ˜ 생성
455
  enhanced_process_func = create_enhanced_process_func(text2music_process_func)
 
 
 
456
 
457
  with gr.Row():
458
  with gr.Column():
 
460
  with gr.Group():
461
  gr.Markdown("### ⚑ ν’ˆμ§ˆ & μ„±λŠ₯ μ„€μ •")
462
  with gr.Row():
463
+ quality_preset = gr.Dropdown(
464
  choices=list(QUALITY_PRESETS.keys()),
465
  value="Standard",
466
  label="ν’ˆμ§ˆ 프리셋",
467
+ scale=2
 
468
  )
469
+ multi_seed_mode = gr.Dropdown(
470
  choices=list(MULTI_SEED_OPTIONS.keys()),
471
  value="Single",
472
  label="닀쀑 생성 λͺ¨λ“œ",
473
  scale=2,
474
+ info="μ—¬λŸ¬ 번 μƒμ„±ν•˜μ—¬ 졜고 ν’ˆμ§ˆ 선택"
 
475
  )
476
 
477
+ preset_description = gr.Textbox(
478
  value=QUALITY_PRESETS["Standard"]["description"],
479
  label="μ„€λͺ…",
480
  interactive=False,
 
482
  )
483
 
484
  with gr.Row(equal_height=True):
485
+ # add markdown, tags and lyrics examples are from ai music generation community
486
+ audio_duration = gr.Slider(
487
  -1,
488
  240.0,
489
  step=0.00001,
 
493
  info="-1 means random duration (30 ~ 240).",
494
  scale=7,
495
  )
496
+ sample_bnt = gr.Button("Sample", variant="secondary", scale=1)
497
+ preview_bnt = gr.Button("🎡 Preview", variant="secondary", scale=2)
498
 
499
  # audio2audio
500
  with gr.Row(equal_height=True):
501
+ audio2audio_enable = gr.Checkbox(
502
  label="Enable Audio2Audio",
503
  value=False,
504
  info="Check to enable Audio-to-Audio generation using a reference audio.",
505
  elem_id="audio2audio_checkbox"
506
  )
507
+ lora_name_or_path = gr.Dropdown(
508
  label="Lora Name or Path",
509
  choices=["ACE-Step/ACE-Step-v1-chinese-rap-LoRA", "none"],
510
  value="none",
511
  allow_custom_value=True,
512
  )
513
 
514
+ ref_audio_input = gr.Audio(
515
  type="filepath",
516
  label="Reference Audio (for Audio2Audio)",
517
  visible=False,
518
  elem_id="ref_audio_input",
519
  show_download_button=True
520
  )
521
+ ref_audio_strength = gr.Slider(
522
  label="Refer audio strength",
523
  minimum=0.0,
524
  maximum=1.0,
 
529
  interactive=True,
530
  )
531
 
532
+ def toggle_ref_audio_visibility(is_checked):
533
+ return (
534
+ gr.update(visible=is_checked, elem_id="ref_audio_input"),
535
+ gr.update(visible=is_checked, elem_id="ref_audio_strength"),
536
+ )
537
+
538
+ audio2audio_enable.change(
539
+ fn=toggle_ref_audio_visibility,
540
+ inputs=[audio2audio_enable],
541
+ outputs=[ref_audio_input, ref_audio_strength],
542
+ )
543
+
544
  with gr.Column(scale=2):
545
  with gr.Group():
546
  gr.Markdown("""### 🎼 슀마트 ν”„λ‘¬ν”„νŠΈ μ‹œμŠ€ν…œ
547
+ <center>μž₯λ₯΄ 선택 μ‹œ μžλ™μœΌλ‘œ μ΅œμ ν™”λœ νƒœκ·Έκ°€ μΆ”κ°€λ©λ‹ˆλ‹€. 콀마둜 κ΅¬λΆ„ν•˜μ—¬ νƒœκ·Έλ₯Ό μž…λ ₯ν•˜μ„Έμš”.</center>""")
548
 
549
  with gr.Row():
550
+ genre_preset = gr.Dropdown(
551
  choices=["Custom"] + list(GENRE_PRESETS.keys()),
552
  value="Custom",
553
  label="μž₯λ₯΄ 프리셋",
554
  scale=1,
 
555
  )
556
+ enable_smart_enhancement = gr.Checkbox(
 
 
 
 
 
 
 
557
  label="슀마트 ν–₯상",
558
  value=True,
559
  info="μžλ™ νƒœκ·Έ μ΅œμ ν™”",
560
  scale=1
561
  )
562
 
563
+ prompt = gr.Textbox(
564
  lines=2,
565
  label="Tags",
566
  max_lines=4,
567
  value=TAG_DEFAULT,
568
  placeholder="콀마둜 κ΅¬λΆ„λœ νƒœκ·Έλ“€...",
 
569
  )
570
 
571
+ # [ADDED] AI μž‘μ‚¬ μ‹œμŠ€ν…œ UI
572
  with gr.Group():
573
+ gr.Markdown("""### πŸ€– AI μž‘μ‚¬ μ‹œμŠ€ν…œ
574
  <center>주제λ₯Ό μž…λ ₯ν•˜κ³  'AI μž‘μ‚¬' λ²„νŠΌμ„ ν΄λ¦­ν•˜λ©΄ μžλ™μœΌλ‘œ 가사가 μƒμ„±λ©λ‹ˆλ‹€.</center>""")
575
 
576
  with gr.Row():
577
+ lyric_theme_input = gr.Textbox(
578
  label="μž‘μ‚¬ 주제",
579
+ placeholder="예: μ²«μ‚¬λž‘μ˜ μ„€λ ˜, μ΄λ³„μ˜ μ•„ν””, κ΅°λŒ€κ°€λŠ” λ‚¨μžμ˜ ν•œμˆ¨, 희망찬 내일...",
580
  scale=3,
581
  interactive=True
582
  )
583
+ generate_lyrics_btn = gr.Button("πŸ€– AI μž‘μ‚¬", variant="secondary", scale=1)
584
 
585
+ # API μƒνƒœ ν‘œμ‹œ
586
+ api_status = gr.Textbox(
587
+ value="βœ… AI μž‘μ‚¬ κΈ°λŠ₯ ν™œμ„±ν™”λ¨" if client_available else "❌ API ν‚€κ°€ μ„€μ •λ˜μ§€ μ•ŠμŒ (export LLM_API='your-key')",
588
+ label="API μƒνƒœ",
589
+ interactive=False,
590
+ max_lines=1,
591
+ scale=1
592
+ )
593
+
594
+ with gr.Group():
595
+ gr.Markdown("""### πŸ“ 가사 μž…λ ₯
596
+ <center>ꡬ쑰 νƒœκ·Έ [verse], [chorus], [bridge] μ‚¬μš©μ„ ꢌμž₯ν•©λ‹ˆλ‹€.<br>[instrumental] λ˜λŠ” [inst]λ₯Ό μ‚¬μš©ν•˜λ©΄ 연주곑을 μƒμ„±ν•©λ‹ˆλ‹€.</center>""")
597
+ lyrics = gr.Textbox(
598
  lines=9,
599
  label="Lyrics",
600
  max_lines=13,
601
  value=LYRIC_DEFAULT,
602
+ placeholder="가사λ₯Ό μž…λ ₯ν•˜μ„Έμš”. [verse], [chorus] λ“±μ˜ ꡬ쑰 νƒœκ·Έ μ‚¬μš©μ„ ꢌμž₯ν•©λ‹ˆλ‹€."
 
603
  )
604
 
605
  with gr.Accordion("Basic Settings", open=False):
606
+ infer_step = gr.Slider(
607
  minimum=1,
608
  maximum=300,
609
  step=1,
 
611
  label="Infer Steps",
612
  interactive=True,
613
  )
614
+ guidance_scale = gr.Slider(
615
  minimum=0.0,
616
  maximum=30.0,
617
  step=0.1,
 
620
  interactive=True,
621
  info="When guidance_scale_lyric > 1 and guidance_scale_text > 1, the guidance scale will not be applied.",
622
  )
623
+ guidance_scale_text = gr.Slider(
624
  minimum=0.0,
625
  maximum=10.0,
626
  step=0.1,
 
629
  interactive=True,
630
  info="Guidance scale for text condition. It can only apply to cfg. set guidance_scale_text=5.0, guidance_scale_lyric=1.5 for start",
631
  )
632
+ guidance_scale_lyric = gr.Slider(
633
  minimum=0.0,
634
  maximum=10.0,
635
  step=0.1,
 
638
  interactive=True,
639
  )
640
 
641
+ manual_seeds = gr.Textbox(
642
  label="manual seeds (default None)",
643
  placeholder="1,2,3,4",
644
  value=None,
 
646
  )
647
 
648
  with gr.Accordion("Advanced Settings", open=False):
649
+ scheduler_type = gr.Radio(
650
  ["euler", "heun"],
651
  value="euler",
652
  label="Scheduler Type",
653
  elem_id="scheduler_type",
654
  info="Scheduler type for the generation. euler is recommended. heun will take more time.",
655
  )
656
+ cfg_type = gr.Radio(
657
  ["cfg", "apg", "cfg_star"],
658
  value="apg",
659
  label="CFG Type",
660
  elem_id="cfg_type",
661
  info="CFG type for the generation. apg is recommended. cfg and cfg_star are almost the same.",
662
  )
663
+ use_erg_tag = gr.Checkbox(
664
  label="use ERG for tag",
665
  value=True,
666
  info="Use Entropy Rectifying Guidance for tag. It will multiple a temperature to the attention to make a weaker tag condition and make better diversity.",
667
  )
668
+ use_erg_lyric = gr.Checkbox(
669
  label="use ERG for lyric",
670
  value=False,
671
  info="The same but apply to lyric encoder's attention.",
672
  )
673
+ use_erg_diffusion = gr.Checkbox(
674
  label="use ERG for diffusion",
675
  value=True,
676
  info="The same but apply to diffusion model's attention.",
677
  )
678
 
679
+ omega_scale = gr.Slider(
680
  minimum=-100.0,
681
  maximum=100.0,
682
  step=0.1,
 
686
  info="Granularity scale for the generation. Higher values can reduce artifacts",
687
  )
688
 
689
+ guidance_interval = gr.Slider(
690
  minimum=0.0,
691
  maximum=1.0,
692
  step=0.01,
 
695
  interactive=True,
696
  info="Guidance interval for the generation. 0.5 means only apply guidance in the middle steps (0.25 * infer_steps to 0.75 * infer_steps)",
697
  )
698
+ guidance_interval_decay = gr.Slider(
699
  minimum=0.0,
700
  maximum=1.0,
701
  step=0.01,
 
704
  interactive=True,
705
  info="Guidance interval decay for the generation. Guidance scale will decay from guidance_scale to min_guidance_scale in the interval. 0.0 means no decay.",
706
  )
707
+ min_guidance_scale = gr.Slider(
708
  minimum=0.0,
709
  maximum=200.0,
710
  step=0.1,
 
713
  interactive=True,
714
  info="Min guidance scale for guidance interval decay's end scale",
715
  )
716
+ oss_steps = gr.Textbox(
717
  label="OSS Steps",
718
  placeholder="16, 29, 52, 96, 129, 158, 172, 183, 189, 200",
719
  value=None,
720
  info="Optimal Steps for the generation. But not test well",
721
  )
722
 
723
+ text2music_bnt = gr.Button("🎡 Generate Music", variant="primary", size="lg")
724
+
725
+ # [ADDED] AI μž‘μ‚¬ 이벀트 ν•Έλ“€λŸ¬
726
+ def handle_ai_lyrics_generation(theme, genre):
727
+ """AI μž‘μ‚¬ λ²„νŠΌ 클릭 처리"""
728
+ print(f"πŸ€– AI μž‘μ‚¬ λ²„νŠΌ 클릭: 주제='{theme}', μž₯λ₯΄='{genre}'")
729
+
730
+ if not theme or theme.strip() == "":
731
+ return "⚠️ μž‘μ‚¬ 주제λ₯Ό μž…λ ₯ν•΄μ£Όμ„Έμš”!"
732
+
733
+ try:
734
+ generated_lyrics = generate_lyrics_with_ai(theme, genre)
735
+ return generated_lyrics
736
+ except Exception as e:
737
+ print(f"μž‘μ‚¬ 생성 쀑 였λ₯˜: {e}")
738
+ return f"❌ μž‘μ‚¬ 생성 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
739
+
740
+ generate_lyrics_btn.click(
741
+ fn=handle_ai_lyrics_generation,
742
+ inputs=[lyric_theme_input, genre_preset],
743
+ outputs=[lyrics]
744
+ )
745
+
746
+ # λͺ¨λ“  UI μš”μ†Œκ°€ μ •μ˜λœ ν›„ 이벀트 ν•Έλ“€λŸ¬ μ„€μ •
747
+ genre_preset.change(
748
+ fn=update_tags_from_preset,
749
+ inputs=[genre_preset],
750
+ outputs=[prompt]
751
+ )
752
+
753
+ quality_preset.change(
754
+ fn=lambda x: QUALITY_PRESETS.get(x, {}).get("description", ""),
755
+ inputs=[quality_preset],
756
+ outputs=[preset_description]
757
+ )
758
+
759
+ quality_preset.change(
760
+ fn=update_quality_preset,
761
+ inputs=[quality_preset],
762
+ outputs=[infer_step, guidance_scale, scheduler_type, omega_scale, use_erg_diffusion, use_erg_tag]
763
+ )
764
 
765
  with gr.Column():
766
  outputs, input_params_json = create_output_ui()
767
+
768
+ # μ‹€μ‹œκ°„ 프리뷰 κΈ°λŠ₯
769
+ def generate_preview(prompt, lyrics, genre_preset):
770
+ """10초 프리뷰 생성"""
771
+ preview_params = {
772
+ "audio_duration": 10,
773
+ "infer_step": 50,
774
+ "guidance_scale": 12.0,
775
+ "scheduler_type": "euler",
776
+ "cfg_type": "apg",
777
+ "omega_scale": 5.0,
778
+ }
779
+
780
+ enhanced_prompt = enhance_prompt_with_genre(prompt, genre_preset) if genre_preset != "Custom" else prompt
781
+
782
+ try:
783
+ # μ‹€μ œ κ΅¬ν˜„μ—μ„œλŠ” λΉ λ₯Έ 생성 λͺ¨λ“œ μ‚¬μš©
784
+ result = enhanced_process_func(
785
+ preview_params["audio_duration"],
786
+ enhanced_prompt,
787
+ lyrics[:200], # 가사 μΌλΆ€λ§Œ μ‚¬μš©
788
+ preview_params["infer_step"],
789
+ preview_params["guidance_scale"],
790
+ preview_params["scheduler_type"],
791
+ preview_params["cfg_type"],
792
+ preview_params["omega_scale"],
793
+ None, # manual_seeds
794
+ 0.5, # guidance_interval
795
+ 0.0, # guidance_interval_decay
796
+ 3.0, # min_guidance_scale
797
+ True, # use_erg_tag
798
+ False, # use_erg_lyric
799
+ True, # use_erg_diffusion
800
+ None, # oss_steps
801
+ 0.0, # guidance_scale_text
802
+ 0.0, # guidance_scale_lyric
803
+ multi_seed_mode="Single"
804
+ )
805
+ return result[0] if result else None
806
+ except Exception as e:
807
+ return f"프리뷰 생성 μ‹€νŒ¨: {str(e)}"
808
 
809
+ preview_bnt.click(
810
+ fn=generate_preview,
811
+ inputs=[prompt, lyrics, genre_preset],
812
+ outputs=[outputs[0]]
813
+ )
814
 
815
+ with gr.Tab("retake"):
816
+ retake_variance = gr.Slider(
817
+ minimum=0.0, maximum=1.0, step=0.01, value=0.2, label="variance"
818
+ )
819
+ retake_seeds = gr.Textbox(
820
+ label="retake seeds (default None)", placeholder="", value=None
821
+ )
822
+ retake_bnt = gr.Button("Retake", variant="primary")
823
+ retake_outputs, retake_input_params_json = create_output_ui("Retake")
824
 
825
+ def retake_process_func(json_data, retake_variance, retake_seeds):
826
+ return enhanced_process_func(
827
+ json_data.get("audio_duration", 30),
828
+ json_data.get("prompt", ""),
829
+ json_data.get("lyrics", ""),
830
+ json_data.get("infer_step", 100),
831
+ json_data.get("guidance_scale", 15.0),
832
+ json_data.get("scheduler_type", "euler"),
833
+ json_data.get("cfg_type", "apg"),
834
+ json_data.get("omega_scale", 10.0),
835
+ retake_seeds,
836
+ json_data.get("guidance_interval", 0.5),
837
+ json_data.get("guidance_interval_decay", 0.0),
838
+ json_data.get("min_guidance_scale", 3.0),
839
+ json_data.get("use_erg_tag", True),
840
+ json_data.get("use_erg_lyric", False),
841
+ json_data.get("use_erg_diffusion", True),
842
+ json_data.get("oss_steps", None),
843
+ json_data.get("guidance_scale_text", 0.0),
844
+ json_data.get("guidance_scale_lyric", 0.0),
845
+ audio2audio_enable=json_data.get("audio2audio_enable", False),
846
+ ref_audio_strength=json_data.get("ref_audio_strength", 0.5),
847
+ ref_audio_input=json_data.get("ref_audio_input", None),
848
+ lora_name_or_path=json_data.get("lora_name_or_path", "none"),
849
+ multi_seed_mode="Best of 3", # retakeλŠ” μžλ™μœΌλ‘œ 닀쀑 생성
850
+ retake_variance=retake_variance,
851
+ task="retake"
852
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
853
 
854
+ retake_bnt.click(
855
+ fn=retake_process_func,
856
+ inputs=[
857
+ input_params_json,
858
+ retake_variance,
859
+ retake_seeds,
860
+ ],
861
+ outputs=retake_outputs + [retake_input_params_json],
862
+ )
 
 
 
 
863
 
864
+ with gr.Tab("repainting"):
865
+ retake_variance = gr.Slider(
866
+ minimum=0.0, maximum=1.0, step=0.01, value=0.2, label="variance"
867
+ )
868
+ retake_seeds = gr.Textbox(
869
+ label="repaint seeds (default None)", placeholder="", value=None
870
+ )
871
+ repaint_start = gr.Slider(
872
+ minimum=0.0,
873
+ maximum=240.0,
874
+ step=0.01,
875
+ value=0.0,
876
+ label="Repaint Start Time",
877
+ interactive=True,
878
+ )
879
+ repaint_end = gr.Slider(
880
+ minimum=0.0,
881
+ maximum=240.0,
882
+ step=0.01,
883
+ value=30.0,
884
+ label="Repaint End Time",
885
+ interactive=True,
886
+ )
887
+ repaint_source = gr.Radio(
888
+ ["text2music", "last_repaint", "upload"],
889
+ value="text2music",
890
+ label="Repaint Source",
891
+ elem_id="repaint_source",
892
+ )
893
+
894
+ repaint_source_audio_upload = gr.Audio(
895
+ label="Upload Audio",
896
+ type="filepath",
897
+ visible=False,
898
+ elem_id="repaint_source_audio_upload",
899
+ show_download_button=True,
900
+ )
901
+ repaint_source.change(
902
+ fn=lambda x: gr.update(
903
+ visible=x == "upload", elem_id="repaint_source_audio_upload"
904
+ ),
905
+ inputs=[repaint_source],
906
+ outputs=[repaint_source_audio_upload],
907
+ )
908
+
909
+ repaint_bnt = gr.Button("Repaint", variant="primary")
910
+ repaint_outputs, repaint_input_params_json = create_output_ui("Repaint")
911
+
912
+ def repaint_process_func(
913
+ text2music_json_data,
914
+ repaint_json_data,
915
+ retake_variance,
916
+ retake_seeds,
917
+ repaint_start,
918
+ repaint_end,
919
+ repaint_source,
920
+ repaint_source_audio_upload,
921
+ prompt,
922
+ lyrics,
923
+ infer_step,
924
+ guidance_scale,
925
+ scheduler_type,
926
+ cfg_type,
927
+ omega_scale,
928
+ manual_seeds,
929
+ guidance_interval,
930
+ guidance_interval_decay,
931
+ min_guidance_scale,
932
+ use_erg_tag,
933
+ use_erg_lyric,
934
+ use_erg_diffusion,
935
+ oss_steps,
936
+ guidance_scale_text,
937
+ guidance_scale_lyric,
938
+ ):
939
+ if repaint_source == "upload":
940
+ src_audio_path = repaint_source_audio_upload
941
+ audio_duration = librosa.get_duration(filename=src_audio_path)
942
+ json_data = {"audio_duration": audio_duration}
943
+ elif repaint_source == "text2music":
944
+ json_data = text2music_json_data
945
+ src_audio_path = json_data["audio_path"]
946
+ elif repaint_source == "last_repaint":
947
+ json_data = repaint_json_data
948
+ src_audio_path = json_data["audio_path"]
949
+
950
+ return enhanced_process_func(
951
+ json_data["audio_duration"],
952
+ prompt,
953
+ lyrics,
954
+ infer_step,
955
+ guidance_scale,
956
+ scheduler_type,
957
+ cfg_type,
958
+ omega_scale,
959
+ manual_seeds,
960
+ guidance_interval,
961
+ guidance_interval_decay,
962
+ min_guidance_scale,
963
+ use_erg_tag,
964
+ use_erg_lyric,
965
+ use_erg_diffusion,
966
+ oss_steps,
967
+ guidance_scale_text,
968
+ guidance_scale_lyric,
969
+ retake_seeds=retake_seeds,
970
+ retake_variance=retake_variance,
971
+ task="repaint",
972
+ repaint_start=repaint_start,
973
+ repaint_end=repaint_end,
974
+ src_audio_path=src_audio_path,
975
+ lora_name_or_path="none"
976
+ )
977
+
978
+ repaint_bnt.click(
979
+ fn=repaint_process_func,
980
+ inputs=[
981
+ input_params_json,
982
+ repaint_input_params_json,
983
+ retake_variance,
984
+ retake_seeds,
985
+ repaint_start,
986
+ repaint_end,
987
+ repaint_source,
988
+ repaint_source_audio_upload,
989
+ prompt,
990
+ lyrics,
991
+ infer_step,
992
+ guidance_scale,
993
+ scheduler_type,
994
+ cfg_type,
995
+ omega_scale,
996
+ manual_seeds,
997
+ guidance_interval,
998
+ guidance_interval_decay,
999
+ min_guidance_scale,
1000
+ use_erg_tag,
1001
+ use_erg_lyric,
1002
+ use_erg_diffusion,
1003
+ oss_steps,
1004
+ guidance_scale_text,
1005
+ guidance_scale_lyric,
1006
+ ],
1007
+ outputs=repaint_outputs + [repaint_input_params_json],
1008
+ )
1009
+
1010
+ with gr.Tab("edit"):
1011
+ edit_prompt = gr.Textbox(lines=2, label="Edit Tags", max_lines=4)
1012
+ edit_lyrics = gr.Textbox(lines=9, label="Edit Lyrics", max_lines=13)
1013
+ retake_seeds = gr.Textbox(
1014
+ label="edit seeds (default None)", placeholder="", value=None
1015
+ )
1016
+
1017
+ edit_type = gr.Radio(
1018
+ ["only_lyrics", "remix"],
1019
+ value="only_lyrics",
1020
+ label="Edit Type",
1021
+ elem_id="edit_type",
1022
+ info="`only_lyrics` will keep the whole song the same except lyrics difference. Make your diffrence smaller, e.g. one lyrc line change.\nremix can change the song melody and genre",
1023
+ )
1024
+ edit_n_min = gr.Slider(
1025
+ minimum=0.0,
1026
+ maximum=1.0,
1027
+ step=0.01,
1028
+ value=0.6,
1029
+ label="edit_n_min",
1030
+ interactive=True,
1031
+ )
1032
+ edit_n_max = gr.Slider(
1033
+ minimum=0.0,
1034
+ maximum=1.0,
1035
+ step=0.01,
1036
+ value=1.0,
1037
+ label="edit_n_max",
1038
+ interactive=True,
1039
+ )
1040
+
1041
+ def edit_type_change_func(edit_type):
1042
+ if edit_type == "only_lyrics":
1043
+ n_min = 0.6
1044
+ n_max = 1.0
1045
+ elif edit_type == "remix":
1046
+ n_min = 0.2
1047
+ n_max = 0.4
1048
+ return n_min, n_max
1049
+
1050
+ edit_type.change(
1051
+ edit_type_change_func,
1052
+ inputs=[edit_type],
1053
+ outputs=[edit_n_min, edit_n_max],
1054
+ )
1055
+
1056
+ edit_source = gr.Radio(
1057
+ ["text2music", "last_edit", "upload"],
1058
+ value="text2music",
1059
+ label="Edit Source",
1060
+ elem_id="edit_source",
1061
+ )
1062
+ edit_source_audio_upload = gr.Audio(
1063
+ label="Upload Audio",
1064
+ type="filepath",
1065
+ visible=False,
1066
+ elem_id="edit_source_audio_upload",
1067
+ show_download_button=True,
1068
+ )
1069
+ edit_source.change(
1070
+ fn=lambda x: gr.update(
1071
+ visible=x == "upload", elem_id="edit_source_audio_upload"
1072
+ ),
1073
+ inputs=[edit_source],
1074
+ outputs=[edit_source_audio_upload],
1075
+ )
1076
+
1077
+ edit_bnt = gr.Button("Edit", variant="primary")
1078
+ edit_outputs, edit_input_params_json = create_output_ui("Edit")
1079
+
1080
+ def edit_process_func(
1081
+ text2music_json_data,
1082
+ edit_input_params_json,
1083
+ edit_source,
1084
+ edit_source_audio_upload,
1085
+ prompt,
1086
+ lyrics,
1087
+ edit_prompt,
1088
+ edit_lyrics,
1089
+ edit_n_min,
1090
+ edit_n_max,
1091
+ infer_step,
1092
+ guidance_scale,
1093
+ scheduler_type,
1094
+ cfg_type,
1095
+ omega_scale,
1096
+ manual_seeds,
1097
+ guidance_interval,
1098
+ guidance_interval_decay,
1099
+ min_guidance_scale,
1100
+ use_erg_tag,
1101
+ use_erg_lyric,
1102
+ use_erg_diffusion,
1103
+ oss_steps,
1104
+ guidance_scale_text,
1105
+ guidance_scale_lyric,
1106
+ retake_seeds,
1107
+ ):
1108
+ if edit_source == "upload":
1109
+ src_audio_path = edit_source_audio_upload
1110
+ audio_duration = librosa.get_duration(filename=src_audio_path)
1111
+ json_data = {"audio_duration": audio_duration}
1112
+ elif edit_source == "text2music":
1113
+ json_data = text2music_json_data
1114
+ src_audio_path = json_data["audio_path"]
1115
+ elif edit_source == "last_edit":
1116
+ json_data = edit_input_params_json
1117
+ src_audio_path = json_data["audio_path"]
1118
+
1119
+ if not edit_prompt:
1120
+ edit_prompt = prompt
1121
+ if not edit_lyrics:
1122
+ edit_lyrics = lyrics
1123
 
1124
+ return enhanced_process_func(
1125
+ json_data["audio_duration"],
1126
+ prompt,
1127
+ lyrics,
1128
+ infer_step,
1129
+ guidance_scale,
1130
+ scheduler_type,
1131
+ cfg_type,
1132
+ omega_scale,
1133
+ manual_seeds,
1134
+ guidance_interval,
1135
+ guidance_interval_decay,
1136
+ min_guidance_scale,
1137
+ use_erg_tag,
1138
+ use_erg_lyric,
1139
+ use_erg_diffusion,
1140
+ oss_steps,
1141
+ guidance_scale_text,
1142
+ guidance_scale_lyric,
1143
+ task="edit",
1144
+ src_audio_path=src_audio_path,
1145
+ edit_target_prompt=edit_prompt,
1146
+ edit_target_lyrics=edit_lyrics,
1147
+ edit_n_min=edit_n_min,
1148
+ edit_n_max=edit_n_max,
1149
+ retake_seeds=retake_seeds,
1150
+ lora_name_or_path="none"
1151
+ )
1152
+
1153
+ edit_bnt.click(
1154
+ fn=edit_process_func,
1155
+ inputs=[
1156
+ input_params_json,
1157
+ edit_input_params_json,
1158
+ edit_source,
1159
+ edit_source_audio_upload,
1160
+ prompt,
1161
+ lyrics,
1162
+ edit_prompt,
1163
+ edit_lyrics,
1164
+ edit_n_min,
1165
+ edit_n_max,
1166
+ infer_step,
1167
+ guidance_scale,
1168
+ scheduler_type,
1169
+ cfg_type,
1170
+ omega_scale,
1171
+ manual_seeds,
1172
+ guidance_interval,
1173
+ guidance_interval_decay,
1174
+ min_guidance_scale,
1175
+ use_erg_tag,
1176
+ use_erg_lyric,
1177
+ use_erg_diffusion,
1178
+ oss_steps,
1179
+ guidance_scale_text,
1180
+ guidance_scale_lyric,
1181
+ retake_seeds,
1182
+ ],
1183
+ outputs=edit_outputs + [edit_input_params_json],
1184
+ )
1185
+
1186
+ with gr.Tab("extend"):
1187
+ extend_seeds = gr.Textbox(
1188
+ label="extend seeds (default None)", placeholder="", value=None
1189
+ )
1190
+ left_extend_length = gr.Slider(
1191
+ minimum=0.0,
1192
+ maximum=240.0,
1193
+ step=0.01,
1194
+ value=0.0,
1195
+ label="Left Extend Length",
1196
+ interactive=True,
1197
+ )
1198
+ right_extend_length = gr.Slider(
1199
+ minimum=0.0,
1200
+ maximum=240.0,
1201
+ step=0.01,
1202
+ value=30.0,
1203
+ label="Right Extend Length",
1204
+ interactive=True,
1205
+ )
1206
+ extend_source = gr.Radio(
1207
+ ["text2music", "last_extend", "upload"],
1208
+ value="text2music",
1209
+ label="Extend Source",
1210
+ elem_id="extend_source",
1211
+ )
1212
+
1213
+ extend_source_audio_upload = gr.Audio(
1214
+ label="Upload Audio",
1215
+ type="filepath",
1216
+ visible=False,
1217
+ elem_id="extend_source_audio_upload",
1218
+ show_download_button=True,
1219
+ )
1220
+ extend_source.change(
1221
+ fn=lambda x: gr.update(
1222
+ visible=x == "upload", elem_id="extend_source_audio_upload"
1223
+ ),
1224
+ inputs=[extend_source],
1225
+ outputs=[extend_source_audio_upload],
1226
+ )
1227
+
1228
+ extend_bnt = gr.Button("Extend", variant="primary")
1229
+ extend_outputs, extend_input_params_json = create_output_ui("Extend")
1230
+
1231
+ def extend_process_func(
1232
+ text2music_json_data,
1233
+ extend_input_params_json,
1234
+ extend_seeds,
1235
+ left_extend_length,
1236
+ right_extend_length,
1237
+ extend_source,
1238
+ extend_source_audio_upload,
1239
+ prompt,
1240
+ lyrics,
1241
+ infer_step,
1242
+ guidance_scale,
1243
+ scheduler_type,
1244
+ cfg_type,
1245
+ omega_scale,
1246
+ manual_seeds,
1247
+ guidance_interval,
1248
+ guidance_interval_decay,
1249
+ min_guidance_scale,
1250
+ use_erg_tag,
1251
+ use_erg_lyric,
1252
+ use_erg_diffusion,
1253
+ oss_steps,
1254
+ guidance_scale_text,
1255
+ guidance_scale_lyric,
1256
+ ):
1257
+ if extend_source == "upload":
1258
+ src_audio_path = extend_source_audio_upload
1259
+ # get audio duration
1260
+ audio_duration = librosa.get_duration(filename=src_audio_path)
1261
+ json_data = {"audio_duration": audio_duration}
1262
+ elif extend_source == "text2music":
1263
+ json_data = text2music_json_data
1264
+ src_audio_path = json_data["audio_path"]
1265
+ elif extend_source == "last_extend":
1266
+ json_data = extend_input_params_json
1267
+ src_audio_path = json_data["audio_path"]
1268
+
1269
+ repaint_start = -left_extend_length
1270
+ repaint_end = json_data["audio_duration"] + right_extend_length
1271
+ return enhanced_process_func(
1272
+ json_data["audio_duration"],
1273
+ prompt,
1274
+ lyrics,
1275
+ infer_step,
1276
+ guidance_scale,
1277
+ scheduler_type,
1278
+ cfg_type,
1279
+ omega_scale,
1280
+ manual_seeds,
1281
+ guidance_interval,
1282
+ guidance_interval_decay,
1283
+ min_guidance_scale,
1284
+ use_erg_tag,
1285
+ use_erg_lyric,
1286
+ use_erg_diffusion,
1287
+ oss_steps,
1288
+ guidance_scale_text,
1289
+ guidance_scale_lyric,
1290
+ retake_seeds=extend_seeds,
1291
+ retake_variance=1.0,
1292
+ task="extend",
1293
+ repaint_start=repaint_start,
1294
+ repaint_end=repaint_end,
1295
+ src_audio_path=src_audio_path,
1296
+ lora_name_or_path="none"
1297
+ )
1298
+
1299
+ extend_bnt.click(
1300
+ fn=extend_process_func,
1301
+ inputs=[
1302
+ input_params_json,
1303
+ extend_input_params_json,
1304
+ extend_seeds,
1305
+ left_extend_length,
1306
+ right_extend_length,
1307
+ extend_source,
1308
+ extend_source_audio_upload,
1309
+ prompt,
1310
+ lyrics,
1311
+ infer_step,
1312
+ guidance_scale,
1313
+ scheduler_type,
1314
+ cfg_type,
1315
+ omega_scale,
1316
+ manual_seeds,
1317
+ guidance_interval,
1318
+ guidance_interval_decay,
1319
+ min_guidance_scale,
1320
+ use_erg_tag,
1321
+ use_erg_lyric,
1322
+ use_erg_diffusion,
1323
+ oss_steps,
1324
+ guidance_scale_text,
1325
+ guidance_scale_lyric,
1326
+ ],
1327
+ outputs=extend_outputs + [extend_input_params_json],
1328
+ )
1329
+
1330
+ def json2output(json_data):
1331
+ return (
1332
+ json_data["audio_duration"],
1333
+ json_data["prompt"],
1334
+ json_data["lyrics"],
1335
+ json_data["infer_step"],
1336
+ json_data["guidance_scale"],
1337
+ json_data["scheduler_type"],
1338
+ json_data["cfg_type"],
1339
+ json_data["omega_scale"],
1340
+ ", ".join(map(str, json_data["actual_seeds"])),
1341
+ json_data["guidance_interval"],
1342
+ json_data["guidance_interval_decay"],
1343
+ json_data["min_guidance_scale"],
1344
+ json_data["use_erg_tag"],
1345
+ json_data["use_erg_lyric"],
1346
+ json_data["use_erg_diffusion"],
1347
+ ", ".join(map(str, json_data["oss_steps"])),
1348
+ (
1349
+ json_data["guidance_scale_text"]
1350
+ if "guidance_scale_text" in json_data
1351
+ else 0.0
1352
+ ),
1353
+ (
1354
+ json_data["guidance_scale_lyric"]
1355
+ if "guidance_scale_lyric" in json_data
1356
+ else 0.0
1357
+ ),
1358
+ (
1359
+ json_data["audio2audio_enable"]
1360
+ if "audio2audio_enable" in json_data
1361
+ else False
1362
+ ),
1363
+ (
1364
+ json_data["ref_audio_strength"]
1365
+ if "ref_audio_strength" in json_data
1366
+ else 0.5
1367
+ ),
1368
+ (
1369
+ json_data["ref_audio_input"]
1370
+ if "ref_audio_input" in json_data
1371
+ else None
1372
+ ),
1373
+ )
1374
+
1375
+ def sample_data(lora_name_or_path_):
1376
+ if sample_data_func:
1377
+ json_data = sample_data_func(lora_name_or_path_)
1378
+ return json2output(json_data)
1379
+ return {}
1380
+
1381
+ sample_bnt.click(
1382
+ sample_data,
1383
+ inputs=[lora_name_or_path],
1384
+ outputs=[
1385
+ audio_duration,
1386
+ prompt,
1387
+ lyrics,
1388
+ infer_step,
1389
+ guidance_scale,
1390
+ scheduler_type,
1391
+ cfg_type,
1392
+ omega_scale,
1393
+ manual_seeds,
1394
+ guidance_interval,
1395
+ guidance_interval_decay,
1396
+ min_guidance_scale,
1397
+ use_erg_tag,
1398
+ use_erg_lyric,
1399
+ use_erg_diffusion,
1400
+ oss_steps,
1401
+ guidance_scale_text,
1402
+ guidance_scale_lyric,
1403
+ audio2audio_enable,
1404
+ ref_audio_strength,
1405
+ ref_audio_input,
1406
+ ],
1407
+ )
1408
+
1409
+ # 메인 생성 λ²„νŠΌ 이벀트 (ν–₯μƒλœ ν•¨μˆ˜ μ‚¬μš©)
1410
+ text2music_bnt.click(
1411
  fn=enhanced_process_func,
1412
  inputs=[
1413
+ audio_duration,
1414
+ prompt,
1415
+ lyrics,
1416
+ infer_step,
1417
+ guidance_scale,
1418
+ scheduler_type,
1419
+ cfg_type,
1420
+ omega_scale,
1421
+ manual_seeds,
1422
+ guidance_interval,
1423
+ guidance_interval_decay,
1424
+ min_guidance_scale,
1425
+ use_erg_tag,
1426
+ use_erg_lyric,
1427
+ use_erg_diffusion,
1428
+ oss_steps,
1429
+ guidance_scale_text,
1430
+ guidance_scale_lyric,
1431
+ audio2audio_enable,
1432
+ ref_audio_strength,
1433
+ ref_audio_input,
1434
+ lora_name_or_path,
1435
+ multi_seed_mode,
1436
+ enable_smart_enhancement,
1437
+ genre_preset
1438
  ],
1439
+ outputs=outputs + [input_params_json],
1440
  )
1441
+
 
1442
 
1443
  def create_main_demo_ui(
1444
  text2music_process_func=dump_func,
 
1446
  load_data_func=dump_func,
1447
  ):
1448
  with gr.Blocks(
1449
+ title="ACE-Step Model 1.0 DEMO - Enhanced with AI Lyrics",
1450
  theme=gr.themes.Soft(),
1451
  css="""
 
1452
  .gradio-container {
1453
  max-width: 1200px !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1454
  }
 
 
1455
  .quality-info {
1456
+ background: linear-gradient(45deg, #f0f8ff, #e6f3ff);
1457
+ padding: 10px;
1458
+ border-radius: 8px;
1459
+ margin: 5px 0;
1460
+ }
1461
+ .ai-lyrics-section {
1462
+ background: linear-gradient(45deg, #f0fff0, #e6ffe6);
1463
  padding: 15px;
1464
  border-radius: 10px;
1465
  margin: 10px 0;
1466
+ border: 2px solid #90EE90;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1467
  }
1468
  """
1469
  ) as demo:
1470
+ gr.Markdown(
1471
+ """
1472
+ <h1 style="text-align: center;">🎡 ACE-Step PRO with AI Lyrics</h1>
1473
+ <div style="text-align: center; margin: 20px;">
1474
+ <p><strong>πŸš€ μƒˆλ‘œμš΄ κΈ°λŠ₯:</strong> πŸ€– AI μž‘μ‚¬ | ν’ˆμ§ˆ 프리셋 | 닀쀑 생성 | 슀마트 ν”„λ‘¬ν”„νŠΈ | μ‹€μ‹œκ°„ 프리뷰 | ν’ˆμ§ˆ 점수</p>
1475
+ <p>
1476
+ <a href="https://ace-step.github.io/" target='_blank'>Project</a> |
1477
+ <a href="https://huggingface.co/ACE-Step/ACE-Step-v1-3.5B">Checkpoints</a> |
1478
+ <a href="https://discord.gg/rjAZz2xBdG" target='_blank'>Discord</a>
1479
+ </p>
1480
+ </div>
1481
+ """
1482
+ )
1483
+
1484
+ # μ‚¬μš©λ²• κ°€μ΄λ“œ μΆ”κ°€
1485
+ with gr.Accordion("πŸ“– μ‚¬μš©λ²• κ°€μ΄λ“œ", open=False):
1486
+ gr.Markdown("""
1487
+ ### 🎯 λΉ λ₯Έ μ‹œμž‘
1488
+ 1. **πŸ€– AI μž‘μ‚¬**: 주제λ₯Ό μž…λ ₯ν•˜κ³  'AI μž‘μ‚¬' λ²„νŠΌμ„ ν΄λ¦­ν•˜λ©΄ μžλ™μœΌλ‘œ 가사가 μƒμ„±λ©λ‹ˆλ‹€
1489
+ 2. **μž₯λ₯΄ 선택**: μ›ν•˜λŠ” μŒμ•… μž₯λ₯΄λ₯Ό μ„ νƒν•˜λ©΄ μžλ™μœΌλ‘œ μ΅œμ ν™”λœ νƒœκ·Έκ°€ μ μš©λ©λ‹ˆλ‹€
1490
+ 3. **ν’ˆμ§ˆ μ„€μ •**: Draft(빠름) β†’ Standard(ꢌμž₯) β†’ High Quality β†’ Ultra 쀑 선택
1491
+ 4. **닀쀑 생성**: "Best of 3/5/10" μ„ νƒν•˜λ©΄ μ—¬λŸ¬ 번 μƒμ„±ν•˜μ—¬ 졜고 ν’ˆμ§ˆμ„ μžλ™ μ„ νƒν•©λ‹ˆλ‹€
1492
+ 5. **프리뷰**: 전체 생성 μ „ 10초 ν”„λ¦¬λ·°λ‘œ λΉ λ₯΄κ²Œ 확인할 수 μžˆμŠ΅λ‹ˆλ‹€
1493
 
1494
+ ### πŸ€– AI μž‘μ‚¬ κΈ°λŠ₯
1495
+ - **λ‹€κ΅­μ–΄ 지원**: ν•œκ΅­μ–΄, μ˜μ–΄ λ“± μž…λ ₯ 언어와 λ™μΌν•œ μ–Έμ–΄λ‘œ 가사 생성
1496
+ - **주제 μ˜ˆμ‹œ**: "μ²«μ‚¬λž‘μ˜ μ„€λ ˜", "μ΄λ³„μ˜ μ•„ν””", "κ΅°λŒ€κ°€λŠ” λ‚¨μžμ˜ ν•œμˆ¨", "희망찬 내일"
1497
+ - **ꡬ쑰 νƒœκ·Έ**: [verse], [chorus], [bridge] νƒœκ·Έκ°€ μžλ™μœΌλ‘œ ν¬ν•¨λ©λ‹ˆλ‹€
1498
+ - **μž₯λ₯΄ 연동**: μ„ νƒν•œ μž₯λ₯΄μ— λ§žλŠ” μŠ€νƒ€μΌμ˜ 가사가 μƒμ„±λ©λ‹ˆλ‹€
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1499
 
1500
+ ### πŸ’‘ ν’ˆμ§ˆ ν–₯상 팁
1501
+ - **κ³ ν’ˆμ§ˆ 생성**: "High Quality" + "Best of 5" μ‘°ν•© μΆ”μ²œ
1502
+ - **λΉ λ₯Έ ν…ŒμŠ€νŠΈ**: "Draft" + "프리뷰" κΈ°λŠ₯ ν™œμš©
1503
+ - **μž₯λ₯΄ νŠΉν™”**: μž₯λ₯΄ 프리셋 선택 ν›„ "슀마트 ν–₯상" 체크
1504
+ - **가사 ꡬ쑰**: [verse], [chorus], [bridge] νƒœκ·Έ 적극 ν™œμš©
1505
+
1506
+ ### βš™οΈ API μ„€μ •
1507
+ AI μž‘μ‚¬ κΈ°λŠ₯을 μ‚¬μš©ν•˜λ €λ©΄ ν™˜κ²½λ³€μˆ˜μ— OpenAI API ν‚€λ₯Ό μ„€μ •ν•΄μ•Ό ν•©λ‹ˆλ‹€:
1508
+ ```bash
1509
+ export LLM_API="your-openai-api-key"
1510
+ ```
1511
+ """)
1512
+
1513
+ with gr.Tab("🎡 Enhanced Text2Music with AI Lyrics"):
1514
+ create_text2music_ui(
1515
+ gr=gr,
1516
+ text2music_process_func=text2music_process_func,
1517
+ sample_data_func=sample_data_func,
1518
+ load_data_func=load_data_func,
1519
+ )
1520
  return demo
1521
 
1522
 
1523
  if __name__ == "__main__":
1524
+ print("πŸš€ ACE-Step PRO with AI Lyrics μ‹œμž‘ 쀑...")
1525
+
1526
+ # API ν‚€ μƒνƒœ 확인
1527
+ if client_available:
1528
+ print("βœ… OpenAI API μ‚¬μš© κ°€λŠ₯ - AI μž‘μ‚¬ κΈ°λŠ₯ ν™œμ„±ν™”λ¨")
1529
+ else:
1530
+ print("❌ OpenAI API μ‚¬μš© λΆˆκ°€ - ν™˜κ²½λ³€μˆ˜λ₯Ό ν™•μΈν•˜μ„Έμš”")
1531
+ print("μ„€μ • 방법: export LLM_API='your-openai-api-key'")
1532
+
1533
  demo = create_main_demo_ui()
 
1534
  demo.launch(
1535
  server_name="0.0.0.0",
1536
  server_port=7860,
1537
+ share=True # 곡유 링크 생성
1538
+ )