ginipick commited on
Commit
0141981
·
verified ·
1 Parent(s): 1b7d348

Delete ui/components-original.py

Browse files
Files changed (1) hide show
  1. ui/components-original.py +0 -1348
ui/components-original.py DELETED
@@ -1,1348 +0,0 @@
1
- """
2
- ACE-Step: A Step Towards Music Generation Foundation Model
3
-
4
- https://github.com/ace-step/ACE-Step
5
-
6
- Apache 2.0 License
7
- """
8
-
9
- import gradio as gr
10
- import librosa
11
- import os
12
- import random
13
- import hashlib
14
- import numpy as np
15
- import json
16
- from typing import Dict, List, Tuple, Optional
17
-
18
- TAG_DEFAULT = "funk, pop, soul, rock, melodic, guitar, drums, bass, keyboard, percussion, 105 BPM, energetic, upbeat, groovy, vibrant, dynamic"
19
- LYRIC_DEFAULT = """[verse]
20
- Neon lights they flicker bright
21
- City hums in dead of night
22
- Rhythms pulse through concrete veins
23
- Lost in echoes of refrains
24
-
25
- [verse]
26
- Bassline groovin' in my chest
27
- Heartbeats match the city's zest
28
- Electric whispers fill the air
29
- Synthesized dreams everywhere
30
-
31
- [chorus]
32
- Turn it up and let it flow
33
- Feel the fire let it grow
34
- In this rhythm we belong
35
- Hear the night sing out our song
36
-
37
- [verse]
38
- Guitar strings they start to weep
39
- Wake the soul from silent sleep
40
- Every note a story told
41
- In this night we're bold and gold
42
-
43
- [bridge]
44
- Voices blend in harmony
45
- Lost in pure cacophony
46
- Timeless echoes timeless cries
47
- Soulful shouts beneath the skies
48
-
49
- [verse]
50
- Keyboard dances on the keys
51
- Melodies on evening breeze
52
- Catch the tune and hold it tight
53
- In this moment we take flight
54
- """
55
-
56
- # 확장된 장르 프리셋 (기존 + 개선된 태그)
57
- GENRE_PRESETS = {
58
- "Modern Pop": "pop, synth, drums, guitar, 120 bpm, upbeat, catchy, vibrant, female vocals, polished vocals, radio-ready, commercial, layered vocals",
59
- "Rock": "rock, electric guitar, drums, bass, 130 bpm, energetic, rebellious, gritty, male vocals, raw vocals, power chords, driving rhythm",
60
- "Hip Hop": "hip hop, 808 bass, hi-hats, synth, 90 bpm, bold, urban, intense, male vocals, rhythmic vocals, trap beats, punchy drums",
61
- "Country": "country, acoustic guitar, steel guitar, fiddle, 100 bpm, heartfelt, rustic, warm, male vocals, twangy vocals, storytelling, americana",
62
- "EDM": "edm, synth, bass, kick drum, 128 bpm, euphoric, pulsating, energetic, instrumental, progressive build, festival anthem, electronic",
63
- "Reggae": "reggae, guitar, bass, drums, 80 bpm, chill, soulful, positive, male vocals, smooth vocals, offbeat rhythm, island vibes",
64
- "Classical": "classical, orchestral, strings, piano, 60 bpm, elegant, emotive, timeless, instrumental, dynamic range, sophisticated harmony",
65
- "Jazz": "jazz, saxophone, piano, double bass, 110 bpm, smooth, improvisational, soulful, male vocals, crooning vocals, swing feel, sophisticated",
66
- "Metal": "metal, electric guitar, double kick drum, bass, 160 bpm, aggressive, intense, heavy, male vocals, screamed vocals, distorted, powerful",
67
- "R&B": "r&b, synth, bass, drums, 85 bpm, sultry, groovy, romantic, female vocals, silky vocals, smooth production, neo-soul"
68
- }
69
-
70
- # 품질 프리셋 시스템 추가
71
- QUALITY_PRESETS = {
72
- "Draft (Fast)": {
73
- "infer_step": 50,
74
- "guidance_scale": 10.0,
75
- "scheduler_type": "euler",
76
- "omega_scale": 5.0,
77
- "use_erg_diffusion": False,
78
- "use_erg_tag": True,
79
- "description": "빠른 초안 생성 (1-2분)"
80
- },
81
- "Standard": {
82
- "infer_step": 150,
83
- "guidance_scale": 15.0,
84
- "scheduler_type": "euler",
85
- "omega_scale": 10.0,
86
- "use_erg_diffusion": True,
87
- "use_erg_tag": True,
88
- "description": "표준 품질 (3-5분)"
89
- },
90
- "High Quality": {
91
- "infer_step": 200,
92
- "guidance_scale": 18.0,
93
- "scheduler_type": "heun",
94
- "omega_scale": 15.0,
95
- "use_erg_diffusion": True,
96
- "use_erg_tag": True,
97
- "description": "고품질 생성 (8-12분)"
98
- },
99
- "Ultra (Best)": {
100
- "infer_step": 299,
101
- "guidance_scale": 20.0,
102
- "scheduler_type": "heun",
103
- "omega_scale": 20.0,
104
- "use_erg_diffusion": True,
105
- "use_erg_tag": True,
106
- "description": "최고 품질 (15-20분)"
107
- }
108
- }
109
-
110
- # 다중 시드 생성 설정
111
- MULTI_SEED_OPTIONS = {
112
- "Single": 1,
113
- "Best of 3": 3,
114
- "Best of 5": 5,
115
- "Best of 10": 10
116
- }
117
-
118
- class MusicGenerationCache:
119
- """생성 결과 캐싱 시스템"""
120
- def __init__(self):
121
- self.cache = {}
122
- self.max_cache_size = 50
123
-
124
- def get_cache_key(self, params):
125
- # 중요한 파라미터만으로 해시 생성
126
- key_params = {k: v for k, v in params.items()
127
- if k in ['prompt', 'lyrics', 'infer_step', 'guidance_scale', 'audio_duration']}
128
- return hashlib.md5(str(sorted(key_params.items())).encode()).hexdigest()[:16]
129
-
130
- def get_cached_result(self, params):
131
- key = self.get_cache_key(params)
132
- return self.cache.get(key)
133
-
134
- def cache_result(self, params, result):
135
- if len(self.cache) >= self.max_cache_size:
136
- oldest_key = next(iter(self.cache))
137
- del self.cache[oldest_key]
138
-
139
- key = self.get_cache_key(params)
140
- self.cache[key] = result
141
-
142
- # 전역 캐시 인스턴스
143
- generation_cache = MusicGenerationCache()
144
-
145
- def enhance_prompt_with_genre(base_prompt: str, genre: str) -> str:
146
- """장르에 따른 스마트 프롬프트 확장"""
147
- if genre == "Custom" or not genre:
148
- return base_prompt
149
-
150
- # 장르별 추가 개선 태그
151
- genre_enhancements = {
152
- "Modern Pop": ["polished production", "mainstream appeal", "hook-driven"],
153
- "Rock": ["guitar-driven", "powerful drums", "energetic performance"],
154
- "Hip Hop": ["rhythmic flow", "urban atmosphere", "bass-heavy"],
155
- "Country": ["acoustic warmth", "storytelling melody", "authentic feel"],
156
- "EDM": ["electronic atmosphere", "build-ups", "dance-friendly"],
157
- "Reggae": ["laid-back groove", "tropical vibes", "rhythmic guitar"],
158
- "Classical": ["orchestral depth", "musical sophistication", "timeless beauty"],
159
- "Jazz": ["musical complexity", "improvisational spirit", "sophisticated harmony"],
160
- "Metal": ["aggressive energy", "powerful sound", "intense atmosphere"],
161
- "R&B": ["smooth groove", "soulful expression", "rhythmic sophistication"]
162
- }
163
-
164
- if genre in genre_enhancements:
165
- additional_tags = ", ".join(genre_enhancements[genre])
166
- return f"{base_prompt}, {additional_tags}"
167
-
168
- return base_prompt
169
-
170
- def calculate_quality_score(audio_path: str) -> float:
171
- """간단한 품질 점수 계산 (실제 구현에서는 더 복잡한 메트릭 사용)"""
172
- try:
173
- y, sr = librosa.load(audio_path)
174
-
175
- # 기본 품질 메트릭
176
- rms_energy = np.sqrt(np.mean(y**2))
177
- spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
178
- zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y))
179
-
180
- # 정규화된 점수 (0-100)
181
- energy_score = min(rms_energy * 1000, 40) # 0-40점
182
- spectral_score = min(spectral_centroid / 100, 40) # 0-40점
183
- clarity_score = min((1 - zero_crossing_rate) * 20, 20) # 0-20점
184
-
185
- total_score = energy_score + spectral_score + clarity_score
186
- return round(total_score, 1)
187
- except:
188
- return 50.0 # 기본값
189
-
190
- def update_tags_from_preset(preset_name):
191
- if preset_name == "Custom":
192
- return ""
193
- return GENRE_PRESETS.get(preset_name, "")
194
-
195
- def update_quality_preset(preset_name):
196
- """품질 프리셋 적용"""
197
- if preset_name not in QUALITY_PRESETS:
198
- return (100, 15.0, "euler", 10.0, True, True)
199
-
200
- preset = QUALITY_PRESETS[preset_name]
201
- return (
202
- preset.get("infer_step", 100),
203
- preset.get("guidance_scale", 15.0),
204
- preset.get("scheduler_type", "euler"),
205
- preset.get("omega_scale", 10.0),
206
- preset.get("use_erg_diffusion", True),
207
- preset.get("use_erg_tag", True)
208
- )
209
-
210
- def create_enhanced_process_func(original_func):
211
- """기존 함수를 향상된 기능으로 래핑"""
212
-
213
- def enhanced_func(
214
- audio_duration, prompt, lyrics, infer_step, guidance_scale,
215
- scheduler_type, cfg_type, omega_scale, manual_seeds,
216
- guidance_interval, guidance_interval_decay, min_guidance_scale,
217
- use_erg_tag, use_erg_lyric, use_erg_diffusion, oss_steps,
218
- guidance_scale_text, guidance_scale_lyric,
219
- audio2audio_enable=False, ref_audio_strength=0.5, ref_audio_input=None,
220
- lora_name_or_path="none", multi_seed_mode="Single",
221
- enable_smart_enhancement=True, genre_preset="Custom", **kwargs
222
- ):
223
- # 스마트 프롬프트 확장
224
- if enable_smart_enhancement and genre_preset != "Custom":
225
- prompt = enhance_prompt_with_genre(prompt, genre_preset)
226
-
227
- # 캐시 확인
228
- cache_params = {
229
- 'prompt': prompt, 'lyrics': lyrics, 'audio_duration': audio_duration,
230
- 'infer_step': infer_step, 'guidance_scale': guidance_scale
231
- }
232
-
233
- cached_result = generation_cache.get_cached_result(cache_params)
234
- if cached_result:
235
- return cached_result
236
-
237
- # 다중 시드 생성
238
- num_candidates = MULTI_SEED_OPTIONS.get(multi_seed_mode, 1)
239
-
240
- if num_candidates == 1:
241
- # 기존 함수 호출
242
- result = original_func(
243
- audio_duration, prompt, lyrics, infer_step, guidance_scale,
244
- scheduler_type, cfg_type, omega_scale, manual_seeds,
245
- guidance_interval, guidance_interval_decay, min_guidance_scale,
246
- use_erg_tag, use_erg_lyric, use_erg_diffusion, oss_steps,
247
- guidance_scale_text, guidance_scale_lyric, audio2audio_enable,
248
- ref_audio_strength, ref_audio_input, lora_name_or_path, **kwargs
249
- )
250
- else:
251
- # 다중 시드 생성 및 최적 선택
252
- candidates = []
253
-
254
- for i in range(num_candidates):
255
- seed = random.randint(1, 10000)
256
-
257
- try:
258
- result = original_func(
259
- audio_duration, prompt, lyrics, infer_step, guidance_scale,
260
- scheduler_type, cfg_type, omega_scale, str(seed),
261
- guidance_interval, guidance_interval_decay, min_guidance_scale,
262
- use_erg_tag, use_erg_lyric, use_erg_diffusion, oss_steps,
263
- guidance_scale_text, guidance_scale_lyric, audio2audio_enable,
264
- ref_audio_strength, ref_audio_input, lora_name_or_path, **kwargs
265
- )
266
-
267
- if result and len(result) > 0:
268
- audio_path = result[0] # 첫 번째 결과가 오디오 파일 경로
269
- if audio_path and os.path.exists(audio_path):
270
- quality_score = calculate_quality_score(audio_path)
271
- candidates.append({
272
- "result": result,
273
- "quality_score": quality_score,
274
- "seed": seed
275
- })
276
- except Exception as e:
277
- print(f"Generation {i+1} failed: {e}")
278
- continue
279
-
280
- if candidates:
281
- # 최고 품질 선택
282
- best_candidate = max(candidates, key=lambda x: x["quality_score"])
283
- result = best_candidate["result"]
284
-
285
- # 품질 정보 추가
286
- if len(result) > 1 and isinstance(result[1], dict):
287
- result[1]["quality_score"] = best_candidate["quality_score"]
288
- result[1]["selected_seed"] = best_candidate["seed"]
289
- result[1]["candidates_count"] = len(candidates)
290
- else:
291
- # 모든 생성 실패시 기본 생성
292
- result = original_func(
293
- audio_duration, prompt, lyrics, infer_step, guidance_scale,
294
- scheduler_type, cfg_type, omega_scale, manual_seeds,
295
- guidance_interval, guidance_interval_decay, min_guidance_scale,
296
- use_erg_tag, use_erg_lyric, use_erg_diffusion, oss_steps,
297
- guidance_scale_text, guidance_scale_lyric, audio2audio_enable,
298
- ref_audio_strength, ref_audio_input, lora_name_or_path, **kwargs
299
- )
300
-
301
- # 결과 캐시
302
- generation_cache.cache_result(cache_params, result)
303
- return result
304
-
305
- return enhanced_func
306
-
307
- def create_output_ui(task_name="Text2Music"):
308
- # For many consumer-grade GPU devices, only one batch can be run
309
- output_audio1 = gr.Audio(type="filepath", label=f"{task_name} Generated Audio 1")
310
-
311
- with gr.Accordion(f"{task_name} Parameters & Quality Info", open=False):
312
- input_params_json = gr.JSON(label=f"{task_name} Parameters")
313
-
314
- # 품질 정보 표시 추가
315
- with gr.Row():
316
- quality_score = gr.Number(label="Quality Score (0-100)", value=0, interactive=False)
317
- generation_info = gr.Textbox(
318
- label="Generation Info",
319
- value="",
320
- interactive=False,
321
- max_lines=2
322
- )
323
-
324
- outputs = [output_audio1]
325
- return outputs, input_params_json
326
-
327
- def dump_func(*args):
328
- print(args)
329
- return []
330
-
331
- def create_text2music_ui(
332
- gr,
333
- text2music_process_func,
334
- sample_data_func=None,
335
- load_data_func=None,
336
- ):
337
- # 향상된 프로세스 함수 생성
338
- enhanced_process_func = create_enhanced_process_func(text2music_process_func)
339
-
340
- with gr.Row():
341
- with gr.Column():
342
- # 품질 및 성능 설정 섹션 추가
343
- with gr.Group():
344
- gr.Markdown("### ⚡ 품질 & 성능 설정")
345
- with gr.Row():
346
- quality_preset = gr.Dropdown(
347
- choices=list(QUALITY_PRESETS.keys()),
348
- value="Standard",
349
- label="품질 프리셋",
350
- scale=2
351
- )
352
- multi_seed_mode = gr.Dropdown(
353
- choices=list(MULTI_SEED_OPTIONS.keys()),
354
- value="Single",
355
- label="다중 생성 모드",
356
- scale=2,
357
- info="여러 번 생성하여 최고 품질 선택"
358
- )
359
-
360
- preset_description = gr.Textbox(
361
- value=QUALITY_PRESETS["Standard"]["description"],
362
- label="설명",
363
- interactive=False,
364
- max_lines=1
365
- )
366
-
367
- with gr.Row(equal_height=True):
368
- # add markdown, tags and lyrics examples are from ai music generation community
369
- audio_duration = gr.Slider(
370
- -1,
371
- 240.0,
372
- step=0.00001,
373
- value=-1,
374
- label="Audio Duration",
375
- interactive=True,
376
- info="-1 means random duration (30 ~ 240).",
377
- scale=7,
378
- )
379
- sample_bnt = gr.Button("Sample", variant="secondary", scale=1)
380
- preview_bnt = gr.Button("🎵 Preview", variant="secondary", scale=2)
381
-
382
- # audio2audio
383
- with gr.Row(equal_height=True):
384
- audio2audio_enable = gr.Checkbox(
385
- label="Enable Audio2Audio",
386
- value=False,
387
- info="Check to enable Audio-to-Audio generation using a reference audio.",
388
- elem_id="audio2audio_checkbox"
389
- )
390
- lora_name_or_path = gr.Dropdown(
391
- label="Lora Name or Path",
392
- choices=["ACE-Step/ACE-Step-v1-chinese-rap-LoRA", "none"],
393
- value="none",
394
- allow_custom_value=True,
395
- )
396
-
397
- ref_audio_input = gr.Audio(
398
- type="filepath",
399
- label="Reference Audio (for Audio2Audio)",
400
- visible=False,
401
- elem_id="ref_audio_input",
402
- show_download_button=True
403
- )
404
- ref_audio_strength = gr.Slider(
405
- label="Refer audio strength",
406
- minimum=0.0,
407
- maximum=1.0,
408
- step=0.01,
409
- value=0.5,
410
- elem_id="ref_audio_strength",
411
- visible=False,
412
- interactive=True,
413
- )
414
-
415
- def toggle_ref_audio_visibility(is_checked):
416
- return (
417
- gr.update(visible=is_checked, elem_id="ref_audio_input"),
418
- gr.update(visible=is_checked, elem_id="ref_audio_strength"),
419
- )
420
-
421
- audio2audio_enable.change(
422
- fn=toggle_ref_audio_visibility,
423
- inputs=[audio2audio_enable],
424
- outputs=[ref_audio_input, ref_audio_strength],
425
- )
426
-
427
- with gr.Column(scale=2):
428
- with gr.Group():
429
- gr.Markdown("""### 🎼 스마트 프롬프트 시스템
430
- <center>장르 선택 시 자동으로 최적화된 태그가 추가됩니다. 콤마로 구분하여 태그를 입력하세요.</center>""")
431
-
432
- with gr.Row():
433
- genre_preset = gr.Dropdown(
434
- choices=["Custom"] + list(GENRE_PRESETS.keys()),
435
- value="Custom",
436
- label="장르 프리셋",
437
- scale=1,
438
- )
439
- enable_smart_enhancement = gr.Checkbox(
440
- label="스마트 향상",
441
- value=True,
442
- info="자동 태그 최적화",
443
- scale=1
444
- )
445
-
446
- prompt = gr.Textbox(
447
- lines=2,
448
- label="Tags",
449
- max_lines=4,
450
- value=TAG_DEFAULT,
451
- placeholder="콤마로 구분된 태그들...",
452
- )
453
-
454
- with gr.Group():
455
- gr.Markdown("""### 📝 가사 입력
456
- <center>구조 태그 [verse], [chorus], [bridge] 사용을 권장합니다.<br>[instrumental] 또는 [inst]를 사용하면 연주곡을 생성합니다.</center>""")
457
- lyrics = gr.Textbox(
458
- lines=9,
459
- label="Lyrics",
460
- max_lines=13,
461
- value=LYRIC_DEFAULT,
462
- placeholder="가사를 입력하세요. [verse], [chorus] 등의 구조 태그 사용을 권장합니다."
463
- )
464
-
465
- with gr.Accordion("Basic Settings", open=False):
466
- infer_step = gr.Slider(
467
- minimum=1,
468
- maximum=300,
469
- step=1,
470
- value=150,
471
- label="Infer Steps",
472
- interactive=True,
473
- )
474
- guidance_scale = gr.Slider(
475
- minimum=0.0,
476
- maximum=30.0,
477
- step=0.1,
478
- value=15.0,
479
- label="Guidance Scale",
480
- interactive=True,
481
- info="When guidance_scale_lyric > 1 and guidance_scale_text > 1, the guidance scale will not be applied.",
482
- )
483
- guidance_scale_text = gr.Slider(
484
- minimum=0.0,
485
- maximum=10.0,
486
- step=0.1,
487
- value=0.0,
488
- label="Guidance Scale Text",
489
- interactive=True,
490
- info="Guidance scale for text condition. It can only apply to cfg. set guidance_scale_text=5.0, guidance_scale_lyric=1.5 for start",
491
- )
492
- guidance_scale_lyric = gr.Slider(
493
- minimum=0.0,
494
- maximum=10.0,
495
- step=0.1,
496
- value=0.0,
497
- label="Guidance Scale Lyric",
498
- interactive=True,
499
- )
500
-
501
- manual_seeds = gr.Textbox(
502
- label="manual seeds (default None)",
503
- placeholder="1,2,3,4",
504
- value=None,
505
- info="Seed for the generation",
506
- )
507
-
508
- with gr.Accordion("Advanced Settings", open=False):
509
- scheduler_type = gr.Radio(
510
- ["euler", "heun"],
511
- value="euler",
512
- label="Scheduler Type",
513
- elem_id="scheduler_type",
514
- info="Scheduler type for the generation. euler is recommended. heun will take more time.",
515
- )
516
- cfg_type = gr.Radio(
517
- ["cfg", "apg", "cfg_star"],
518
- value="apg",
519
- label="CFG Type",
520
- elem_id="cfg_type",
521
- info="CFG type for the generation. apg is recommended. cfg and cfg_star are almost the same.",
522
- )
523
- use_erg_tag = gr.Checkbox(
524
- label="use ERG for tag",
525
- value=True,
526
- info="Use Entropy Rectifying Guidance for tag. It will multiple a temperature to the attention to make a weaker tag condition and make better diversity.",
527
- )
528
- use_erg_lyric = gr.Checkbox(
529
- label="use ERG for lyric",
530
- value=False,
531
- info="The same but apply to lyric encoder's attention.",
532
- )
533
- use_erg_diffusion = gr.Checkbox(
534
- label="use ERG for diffusion",
535
- value=True,
536
- info="The same but apply to diffusion model's attention.",
537
- )
538
-
539
- omega_scale = gr.Slider(
540
- minimum=-100.0,
541
- maximum=100.0,
542
- step=0.1,
543
- value=10.0,
544
- label="Granularity Scale",
545
- interactive=True,
546
- info="Granularity scale for the generation. Higher values can reduce artifacts",
547
- )
548
-
549
- guidance_interval = gr.Slider(
550
- minimum=0.0,
551
- maximum=1.0,
552
- step=0.01,
553
- value=0.5,
554
- label="Guidance Interval",
555
- interactive=True,
556
- info="Guidance interval for the generation. 0.5 means only apply guidance in the middle steps (0.25 * infer_steps to 0.75 * infer_steps)",
557
- )
558
- guidance_interval_decay = gr.Slider(
559
- minimum=0.0,
560
- maximum=1.0,
561
- step=0.01,
562
- value=0.0,
563
- label="Guidance Interval Decay",
564
- interactive=True,
565
- info="Guidance interval decay for the generation. Guidance scale will decay from guidance_scale to min_guidance_scale in the interval. 0.0 means no decay.",
566
- )
567
- min_guidance_scale = gr.Slider(
568
- minimum=0.0,
569
- maximum=200.0,
570
- step=0.1,
571
- value=3.0,
572
- label="Min Guidance Scale",
573
- interactive=True,
574
- info="Min guidance scale for guidance interval decay's end scale",
575
- )
576
- oss_steps = gr.Textbox(
577
- label="OSS Steps",
578
- placeholder="16, 29, 52, 96, 129, 158, 172, 183, 189, 200",
579
- value=None,
580
- info="Optimal Steps for the generation. But not test well",
581
- )
582
-
583
- text2music_bnt = gr.Button("🎵 Generate Music", variant="primary", size="lg")
584
-
585
- # 모든 UI 요소가 정의된 후 이벤트 핸들러 설정
586
- genre_preset.change(
587
- fn=update_tags_from_preset,
588
- inputs=[genre_preset],
589
- outputs=[prompt]
590
- )
591
-
592
- quality_preset.change(
593
- fn=lambda x: QUALITY_PRESETS.get(x, {}).get("description", ""),
594
- inputs=[quality_preset],
595
- outputs=[preset_description]
596
- )
597
-
598
- quality_preset.change(
599
- fn=update_quality_preset,
600
- inputs=[quality_preset],
601
- outputs=[infer_step, guidance_scale, scheduler_type, omega_scale, use_erg_diffusion, use_erg_tag]
602
- )
603
-
604
- with gr.Column():
605
- outputs, input_params_json = create_output_ui()
606
-
607
- # 실시간 프리뷰 기능
608
- def generate_preview(prompt, lyrics, genre_preset):
609
- """10초 프리뷰 생성"""
610
- preview_params = {
611
- "audio_duration": 10,
612
- "infer_step": 50,
613
- "guidance_scale": 12.0,
614
- "scheduler_type": "euler",
615
- "cfg_type": "apg",
616
- "omega_scale": 5.0,
617
- }
618
-
619
- enhanced_prompt = enhance_prompt_with_genre(prompt, genre_preset) if genre_preset != "Custom" else prompt
620
-
621
- try:
622
- # 실제 구현에서는 빠른 생성 모드 사용
623
- result = enhanced_process_func(
624
- preview_params["audio_duration"],
625
- enhanced_prompt,
626
- lyrics[:200], # 가사 일부만 사용
627
- preview_params["infer_step"],
628
- preview_params["guidance_scale"],
629
- preview_params["scheduler_type"],
630
- preview_params["cfg_type"],
631
- preview_params["omega_scale"],
632
- None, # manual_seeds
633
- 0.5, # guidance_interval
634
- 0.0, # guidance_interval_decay
635
- 3.0, # min_guidance_scale
636
- True, # use_erg_tag
637
- False, # use_erg_lyric
638
- True, # use_erg_diffusion
639
- None, # oss_steps
640
- 0.0, # guidance_scale_text
641
- 0.0, # guidance_scale_lyric
642
- multi_seed_mode="Single"
643
- )
644
- return result[0] if result else None
645
- except Exception as e:
646
- return f"프리뷰 생성 실패: {str(e)}"
647
-
648
- preview_bnt.click(
649
- fn=generate_preview,
650
- inputs=[prompt, lyrics, genre_preset],
651
- outputs=[outputs[0]]
652
- )
653
-
654
- with gr.Tab("retake"):
655
- retake_variance = gr.Slider(
656
- minimum=0.0, maximum=1.0, step=0.01, value=0.2, label="variance"
657
- )
658
- retake_seeds = gr.Textbox(
659
- label="retake seeds (default None)", placeholder="", value=None
660
- )
661
- retake_bnt = gr.Button("Retake", variant="primary")
662
- retake_outputs, retake_input_params_json = create_output_ui("Retake")
663
-
664
- def retake_process_func(json_data, retake_variance, retake_seeds):
665
- return enhanced_process_func(
666
- json_data.get("audio_duration", 30),
667
- json_data.get("prompt", ""),
668
- json_data.get("lyrics", ""),
669
- json_data.get("infer_step", 100),
670
- json_data.get("guidance_scale", 15.0),
671
- json_data.get("scheduler_type", "euler"),
672
- json_data.get("cfg_type", "apg"),
673
- json_data.get("omega_scale", 10.0),
674
- retake_seeds,
675
- json_data.get("guidance_interval", 0.5),
676
- json_data.get("guidance_interval_decay", 0.0),
677
- json_data.get("min_guidance_scale", 3.0),
678
- json_data.get("use_erg_tag", True),
679
- json_data.get("use_erg_lyric", False),
680
- json_data.get("use_erg_diffusion", True),
681
- json_data.get("oss_steps", None),
682
- json_data.get("guidance_scale_text", 0.0),
683
- json_data.get("guidance_scale_lyric", 0.0),
684
- audio2audio_enable=json_data.get("audio2audio_enable", False),
685
- ref_audio_strength=json_data.get("ref_audio_strength", 0.5),
686
- ref_audio_input=json_data.get("ref_audio_input", None),
687
- lora_name_or_path=json_data.get("lora_name_or_path", "none"),
688
- multi_seed_mode="Best of 3", # retake는 자동으로 다중 생성
689
- retake_variance=retake_variance,
690
- task="retake"
691
- )
692
-
693
- retake_bnt.click(
694
- fn=retake_process_func,
695
- inputs=[
696
- input_params_json,
697
- retake_variance,
698
- retake_seeds,
699
- ],
700
- outputs=retake_outputs + [retake_input_params_json],
701
- )
702
-
703
- with gr.Tab("repainting"):
704
- retake_variance = gr.Slider(
705
- minimum=0.0, maximum=1.0, step=0.01, value=0.2, label="variance"
706
- )
707
- retake_seeds = gr.Textbox(
708
- label="repaint seeds (default None)", placeholder="", value=None
709
- )
710
- repaint_start = gr.Slider(
711
- minimum=0.0,
712
- maximum=240.0,
713
- step=0.01,
714
- value=0.0,
715
- label="Repaint Start Time",
716
- interactive=True,
717
- )
718
- repaint_end = gr.Slider(
719
- minimum=0.0,
720
- maximum=240.0,
721
- step=0.01,
722
- value=30.0,
723
- label="Repaint End Time",
724
- interactive=True,
725
- )
726
- repaint_source = gr.Radio(
727
- ["text2music", "last_repaint", "upload"],
728
- value="text2music",
729
- label="Repaint Source",
730
- elem_id="repaint_source",
731
- )
732
-
733
- repaint_source_audio_upload = gr.Audio(
734
- label="Upload Audio",
735
- type="filepath",
736
- visible=False,
737
- elem_id="repaint_source_audio_upload",
738
- show_download_button=True,
739
- )
740
- repaint_source.change(
741
- fn=lambda x: gr.update(
742
- visible=x == "upload", elem_id="repaint_source_audio_upload"
743
- ),
744
- inputs=[repaint_source],
745
- outputs=[repaint_source_audio_upload],
746
- )
747
-
748
- repaint_bnt = gr.Button("Repaint", variant="primary")
749
- repaint_outputs, repaint_input_params_json = create_output_ui("Repaint")
750
-
751
- def repaint_process_func(
752
- text2music_json_data,
753
- repaint_json_data,
754
- retake_variance,
755
- retake_seeds,
756
- repaint_start,
757
- repaint_end,
758
- repaint_source,
759
- repaint_source_audio_upload,
760
- prompt,
761
- lyrics,
762
- infer_step,
763
- guidance_scale,
764
- scheduler_type,
765
- cfg_type,
766
- omega_scale,
767
- manual_seeds,
768
- guidance_interval,
769
- guidance_interval_decay,
770
- min_guidance_scale,
771
- use_erg_tag,
772
- use_erg_lyric,
773
- use_erg_diffusion,
774
- oss_steps,
775
- guidance_scale_text,
776
- guidance_scale_lyric,
777
- ):
778
- if repaint_source == "upload":
779
- src_audio_path = repaint_source_audio_upload
780
- audio_duration = librosa.get_duration(filename=src_audio_path)
781
- json_data = {"audio_duration": audio_duration}
782
- elif repaint_source == "text2music":
783
- json_data = text2music_json_data
784
- src_audio_path = json_data["audio_path"]
785
- elif repaint_source == "last_repaint":
786
- json_data = repaint_json_data
787
- src_audio_path = json_data["audio_path"]
788
-
789
- return enhanced_process_func(
790
- json_data["audio_duration"],
791
- prompt,
792
- lyrics,
793
- infer_step,
794
- guidance_scale,
795
- scheduler_type,
796
- cfg_type,
797
- omega_scale,
798
- manual_seeds,
799
- guidance_interval,
800
- guidance_interval_decay,
801
- min_guidance_scale,
802
- use_erg_tag,
803
- use_erg_lyric,
804
- use_erg_diffusion,
805
- oss_steps,
806
- guidance_scale_text,
807
- guidance_scale_lyric,
808
- retake_seeds=retake_seeds,
809
- retake_variance=retake_variance,
810
- task="repaint",
811
- repaint_start=repaint_start,
812
- repaint_end=repaint_end,
813
- src_audio_path=src_audio_path,
814
- lora_name_or_path="none"
815
- )
816
-
817
- repaint_bnt.click(
818
- fn=repaint_process_func,
819
- inputs=[
820
- input_params_json,
821
- repaint_input_params_json,
822
- retake_variance,
823
- retake_seeds,
824
- repaint_start,
825
- repaint_end,
826
- repaint_source,
827
- repaint_source_audio_upload,
828
- prompt,
829
- lyrics,
830
- infer_step,
831
- guidance_scale,
832
- scheduler_type,
833
- cfg_type,
834
- omega_scale,
835
- manual_seeds,
836
- guidance_interval,
837
- guidance_interval_decay,
838
- min_guidance_scale,
839
- use_erg_tag,
840
- use_erg_lyric,
841
- use_erg_diffusion,
842
- oss_steps,
843
- guidance_scale_text,
844
- guidance_scale_lyric,
845
- ],
846
- outputs=repaint_outputs + [repaint_input_params_json],
847
- )
848
-
849
- with gr.Tab("edit"):
850
- edit_prompt = gr.Textbox(lines=2, label="Edit Tags", max_lines=4)
851
- edit_lyrics = gr.Textbox(lines=9, label="Edit Lyrics", max_lines=13)
852
- retake_seeds = gr.Textbox(
853
- label="edit seeds (default None)", placeholder="", value=None
854
- )
855
-
856
- edit_type = gr.Radio(
857
- ["only_lyrics", "remix"],
858
- value="only_lyrics",
859
- label="Edit Type",
860
- elem_id="edit_type",
861
- info="`only_lyrics` will keep the whole song the same except lyrics difference. Make your diffrence smaller, e.g. one lyrc line change.\nremix can change the song melody and genre",
862
- )
863
- edit_n_min = gr.Slider(
864
- minimum=0.0,
865
- maximum=1.0,
866
- step=0.01,
867
- value=0.6,
868
- label="edit_n_min",
869
- interactive=True,
870
- )
871
- edit_n_max = gr.Slider(
872
- minimum=0.0,
873
- maximum=1.0,
874
- step=0.01,
875
- value=1.0,
876
- label="edit_n_max",
877
- interactive=True,
878
- )
879
-
880
- def edit_type_change_func(edit_type):
881
- if edit_type == "only_lyrics":
882
- n_min = 0.6
883
- n_max = 1.0
884
- elif edit_type == "remix":
885
- n_min = 0.2
886
- n_max = 0.4
887
- return n_min, n_max
888
-
889
- edit_type.change(
890
- edit_type_change_func,
891
- inputs=[edit_type],
892
- outputs=[edit_n_min, edit_n_max],
893
- )
894
-
895
- edit_source = gr.Radio(
896
- ["text2music", "last_edit", "upload"],
897
- value="text2music",
898
- label="Edit Source",
899
- elem_id="edit_source",
900
- )
901
- edit_source_audio_upload = gr.Audio(
902
- label="Upload Audio",
903
- type="filepath",
904
- visible=False,
905
- elem_id="edit_source_audio_upload",
906
- show_download_button=True,
907
- )
908
- edit_source.change(
909
- fn=lambda x: gr.update(
910
- visible=x == "upload", elem_id="edit_source_audio_upload"
911
- ),
912
- inputs=[edit_source],
913
- outputs=[edit_source_audio_upload],
914
- )
915
-
916
- edit_bnt = gr.Button("Edit", variant="primary")
917
- edit_outputs, edit_input_params_json = create_output_ui("Edit")
918
-
919
- def edit_process_func(
920
- text2music_json_data,
921
- edit_input_params_json,
922
- edit_source,
923
- edit_source_audio_upload,
924
- prompt,
925
- lyrics,
926
- edit_prompt,
927
- edit_lyrics,
928
- edit_n_min,
929
- edit_n_max,
930
- infer_step,
931
- guidance_scale,
932
- scheduler_type,
933
- cfg_type,
934
- omega_scale,
935
- manual_seeds,
936
- guidance_interval,
937
- guidance_interval_decay,
938
- min_guidance_scale,
939
- use_erg_tag,
940
- use_erg_lyric,
941
- use_erg_diffusion,
942
- oss_steps,
943
- guidance_scale_text,
944
- guidance_scale_lyric,
945
- retake_seeds,
946
- ):
947
- if edit_source == "upload":
948
- src_audio_path = edit_source_audio_upload
949
- audio_duration = librosa.get_duration(filename=src_audio_path)
950
- json_data = {"audio_duration": audio_duration}
951
- elif edit_source == "text2music":
952
- json_data = text2music_json_data
953
- src_audio_path = json_data["audio_path"]
954
- elif edit_source == "last_edit":
955
- json_data = edit_input_params_json
956
- src_audio_path = json_data["audio_path"]
957
-
958
- if not edit_prompt:
959
- edit_prompt = prompt
960
- if not edit_lyrics:
961
- edit_lyrics = lyrics
962
-
963
- return enhanced_process_func(
964
- json_data["audio_duration"],
965
- prompt,
966
- lyrics,
967
- infer_step,
968
- guidance_scale,
969
- scheduler_type,
970
- cfg_type,
971
- omega_scale,
972
- manual_seeds,
973
- guidance_interval,
974
- guidance_interval_decay,
975
- min_guidance_scale,
976
- use_erg_tag,
977
- use_erg_lyric,
978
- use_erg_diffusion,
979
- oss_steps,
980
- guidance_scale_text,
981
- guidance_scale_lyric,
982
- task="edit",
983
- src_audio_path=src_audio_path,
984
- edit_target_prompt=edit_prompt,
985
- edit_target_lyrics=edit_lyrics,
986
- edit_n_min=edit_n_min,
987
- edit_n_max=edit_n_max,
988
- retake_seeds=retake_seeds,
989
- lora_name_or_path="none"
990
- )
991
-
992
- edit_bnt.click(
993
- fn=edit_process_func,
994
- inputs=[
995
- input_params_json,
996
- edit_input_params_json,
997
- edit_source,
998
- edit_source_audio_upload,
999
- prompt,
1000
- lyrics,
1001
- edit_prompt,
1002
- edit_lyrics,
1003
- edit_n_min,
1004
- edit_n_max,
1005
- infer_step,
1006
- guidance_scale,
1007
- scheduler_type,
1008
- cfg_type,
1009
- omega_scale,
1010
- manual_seeds,
1011
- guidance_interval,
1012
- guidance_interval_decay,
1013
- min_guidance_scale,
1014
- use_erg_tag,
1015
- use_erg_lyric,
1016
- use_erg_diffusion,
1017
- oss_steps,
1018
- guidance_scale_text,
1019
- guidance_scale_lyric,
1020
- retake_seeds,
1021
- ],
1022
- outputs=edit_outputs + [edit_input_params_json],
1023
- )
1024
-
1025
- with gr.Tab("extend"):
1026
- extend_seeds = gr.Textbox(
1027
- label="extend seeds (default None)", placeholder="", value=None
1028
- )
1029
- left_extend_length = gr.Slider(
1030
- minimum=0.0,
1031
- maximum=240.0,
1032
- step=0.01,
1033
- value=0.0,
1034
- label="Left Extend Length",
1035
- interactive=True,
1036
- )
1037
- right_extend_length = gr.Slider(
1038
- minimum=0.0,
1039
- maximum=240.0,
1040
- step=0.01,
1041
- value=30.0,
1042
- label="Right Extend Length",
1043
- interactive=True,
1044
- )
1045
- extend_source = gr.Radio(
1046
- ["text2music", "last_extend", "upload"],
1047
- value="text2music",
1048
- label="Extend Source",
1049
- elem_id="extend_source",
1050
- )
1051
-
1052
- extend_source_audio_upload = gr.Audio(
1053
- label="Upload Audio",
1054
- type="filepath",
1055
- visible=False,
1056
- elem_id="extend_source_audio_upload",
1057
- show_download_button=True,
1058
- )
1059
- extend_source.change(
1060
- fn=lambda x: gr.update(
1061
- visible=x == "upload", elem_id="extend_source_audio_upload"
1062
- ),
1063
- inputs=[extend_source],
1064
- outputs=[extend_source_audio_upload],
1065
- )
1066
-
1067
- extend_bnt = gr.Button("Extend", variant="primary")
1068
- extend_outputs, extend_input_params_json = create_output_ui("Extend")
1069
-
1070
- def extend_process_func(
1071
- text2music_json_data,
1072
- extend_input_params_json,
1073
- extend_seeds,
1074
- left_extend_length,
1075
- right_extend_length,
1076
- extend_source,
1077
- extend_source_audio_upload,
1078
- prompt,
1079
- lyrics,
1080
- infer_step,
1081
- guidance_scale,
1082
- scheduler_type,
1083
- cfg_type,
1084
- omega_scale,
1085
- manual_seeds,
1086
- guidance_interval,
1087
- guidance_interval_decay,
1088
- min_guidance_scale,
1089
- use_erg_tag,
1090
- use_erg_lyric,
1091
- use_erg_diffusion,
1092
- oss_steps,
1093
- guidance_scale_text,
1094
- guidance_scale_lyric,
1095
- ):
1096
- if extend_source == "upload":
1097
- src_audio_path = extend_source_audio_upload
1098
- # get audio duration
1099
- audio_duration = librosa.get_duration(filename=src_audio_path)
1100
- json_data = {"audio_duration": audio_duration}
1101
- elif extend_source == "text2music":
1102
- json_data = text2music_json_data
1103
- src_audio_path = json_data["audio_path"]
1104
- elif extend_source == "last_extend":
1105
- json_data = extend_input_params_json
1106
- src_audio_path = json_data["audio_path"]
1107
-
1108
- repaint_start = -left_extend_length
1109
- repaint_end = json_data["audio_duration"] + right_extend_length
1110
- return enhanced_process_func(
1111
- json_data["audio_duration"],
1112
- prompt,
1113
- lyrics,
1114
- infer_step,
1115
- guidance_scale,
1116
- scheduler_type,
1117
- cfg_type,
1118
- omega_scale,
1119
- manual_seeds,
1120
- guidance_interval,
1121
- guidance_interval_decay,
1122
- min_guidance_scale,
1123
- use_erg_tag,
1124
- use_erg_lyric,
1125
- use_erg_diffusion,
1126
- oss_steps,
1127
- guidance_scale_text,
1128
- guidance_scale_lyric,
1129
- retake_seeds=extend_seeds,
1130
- retake_variance=1.0,
1131
- task="extend",
1132
- repaint_start=repaint_start,
1133
- repaint_end=repaint_end,
1134
- src_audio_path=src_audio_path,
1135
- lora_name_or_path="none"
1136
- )
1137
-
1138
- extend_bnt.click(
1139
- fn=extend_process_func,
1140
- inputs=[
1141
- input_params_json,
1142
- extend_input_params_json,
1143
- extend_seeds,
1144
- left_extend_length,
1145
- right_extend_length,
1146
- extend_source,
1147
- extend_source_audio_upload,
1148
- prompt,
1149
- lyrics,
1150
- infer_step,
1151
- guidance_scale,
1152
- scheduler_type,
1153
- cfg_type,
1154
- omega_scale,
1155
- manual_seeds,
1156
- guidance_interval,
1157
- guidance_interval_decay,
1158
- min_guidance_scale,
1159
- use_erg_tag,
1160
- use_erg_lyric,
1161
- use_erg_diffusion,
1162
- oss_steps,
1163
- guidance_scale_text,
1164
- guidance_scale_lyric,
1165
- ],
1166
- outputs=extend_outputs + [extend_input_params_json],
1167
- )
1168
-
1169
- def json2output(json_data):
1170
- return (
1171
- json_data["audio_duration"],
1172
- json_data["prompt"],
1173
- json_data["lyrics"],
1174
- json_data["infer_step"],
1175
- json_data["guidance_scale"],
1176
- json_data["scheduler_type"],
1177
- json_data["cfg_type"],
1178
- json_data["omega_scale"],
1179
- ", ".join(map(str, json_data["actual_seeds"])),
1180
- json_data["guidance_interval"],
1181
- json_data["guidance_interval_decay"],
1182
- json_data["min_guidance_scale"],
1183
- json_data["use_erg_tag"],
1184
- json_data["use_erg_lyric"],
1185
- json_data["use_erg_diffusion"],
1186
- ", ".join(map(str, json_data["oss_steps"])),
1187
- (
1188
- json_data["guidance_scale_text"]
1189
- if "guidance_scale_text" in json_data
1190
- else 0.0
1191
- ),
1192
- (
1193
- json_data["guidance_scale_lyric"]
1194
- if "guidance_scale_lyric" in json_data
1195
- else 0.0
1196
- ),
1197
- (
1198
- json_data["audio2audio_enable"]
1199
- if "audio2audio_enable" in json_data
1200
- else False
1201
- ),
1202
- (
1203
- json_data["ref_audio_strength"]
1204
- if "ref_audio_strength" in json_data
1205
- else 0.5
1206
- ),
1207
- (
1208
- json_data["ref_audio_input"]
1209
- if "ref_audio_input" in json_data
1210
- else None
1211
- ),
1212
- )
1213
-
1214
- def sample_data(lora_name_or_path_):
1215
- if sample_data_func:
1216
- json_data = sample_data_func(lora_name_or_path_)
1217
- return json2output(json_data)
1218
- return {}
1219
-
1220
- sample_bnt.click(
1221
- sample_data,
1222
- inputs=[lora_name_or_path],
1223
- outputs=[
1224
- audio_duration,
1225
- prompt,
1226
- lyrics,
1227
- infer_step,
1228
- guidance_scale,
1229
- scheduler_type,
1230
- cfg_type,
1231
- omega_scale,
1232
- manual_seeds,
1233
- guidance_interval,
1234
- guidance_interval_decay,
1235
- min_guidance_scale,
1236
- use_erg_tag,
1237
- use_erg_lyric,
1238
- use_erg_diffusion,
1239
- oss_steps,
1240
- guidance_scale_text,
1241
- guidance_scale_lyric,
1242
- audio2audio_enable,
1243
- ref_audio_strength,
1244
- ref_audio_input,
1245
- ],
1246
- )
1247
-
1248
- # 메인 생성 버튼 이벤트 (향상된 함수 사용)
1249
- text2music_bnt.click(
1250
- fn=enhanced_process_func,
1251
- inputs=[
1252
- audio_duration,
1253
- prompt,
1254
- lyrics,
1255
- infer_step,
1256
- guidance_scale,
1257
- scheduler_type,
1258
- cfg_type,
1259
- omega_scale,
1260
- manual_seeds,
1261
- guidance_interval,
1262
- guidance_interval_decay,
1263
- min_guidance_scale,
1264
- use_erg_tag,
1265
- use_erg_lyric,
1266
- use_erg_diffusion,
1267
- oss_steps,
1268
- guidance_scale_text,
1269
- guidance_scale_lyric,
1270
- audio2audio_enable,
1271
- ref_audio_strength,
1272
- ref_audio_input,
1273
- lora_name_or_path,
1274
- multi_seed_mode,
1275
- enable_smart_enhancement,
1276
- genre_preset
1277
- ],
1278
- outputs=outputs + [input_params_json],
1279
- )
1280
-
1281
-
1282
- def create_main_demo_ui(
1283
- text2music_process_func=dump_func,
1284
- sample_data_func=dump_func,
1285
- load_data_func=dump_func,
1286
- ):
1287
- with gr.Blocks(
1288
- title="ACE-Step Model 1.0 DEMO - Enhanced",
1289
- theme=gr.themes.Soft(),
1290
- css="""
1291
- .gradio-container {
1292
- max-width: 1200px !important;
1293
- }
1294
- .quality-info {
1295
- background: linear-gradient(45deg, #f0f8ff, #e6f3ff);
1296
- padding: 10px;
1297
- border-radius: 8px;
1298
- margin: 5px 0;
1299
- }
1300
- """
1301
- ) as demo:
1302
- gr.Markdown(
1303
- """
1304
- <h1 style="text-align: center;">🎵 ACE-Step PRO</h1>
1305
- <div style="text-align: center; margin: 20px;">
1306
- <p><strong>🚀 새로운 기능:</strong> 품질 프리셋 | 다중 생성 | 스마트 프롬프트 | 실시간 프리뷰 | 품질 점수</p>
1307
- <p>
1308
- <a href="https://ace-step.github.io/" target='_blank'>Project</a> |
1309
- <a href="https://huggingface.co/ACE-Step/ACE-Step-v1-3.5B">Checkpoints</a> |
1310
- <a href="https://discord.gg/rjAZz2xBdG" target='_blank'>Discord</a>
1311
- </p>
1312
- </div>
1313
- """
1314
- )
1315
-
1316
- # 사용법 가이드 추가
1317
- with gr.Accordion("📖 사용법 가이드", open=False):
1318
- gr.Markdown("""
1319
- ### 🎯 빠른 시작
1320
- 1. **장르 선택**: 원하는 음악 장르를 선택하면 자동으로 최적화된 태그가 적용됩니다
1321
- 2. **품질 설정**: Draft(빠름) → Standard(권장) → High Quality → Ultra 중 선택
1322
- 3. **다중 생성**: "Best of 3/5/10" 선택하면 여러 번 생성하여 최고 품질을 자동 선택합니다
1323
- 4. **프리뷰**: 전체 생성 전 10초 프리뷰로 빠르게 확인할 수 있습니다
1324
-
1325
- ### 💡 품질 향상 팁
1326
- - **고품질 생성**: "High Quality" + "Best of 5" 조합 추천
1327
- - **빠른 테스트**: "Draft" + "프리뷰" 기능 활용
1328
- - **장르 특화**: 장르 프리셋 선택 후 "스마트 향상" 체크
1329
- - **가사 구조**: [verse], [chorus], [bridge] 태그 적극 활용
1330
- """)
1331
-
1332
- with gr.Tab("🎵 Enhanced Text2Music"):
1333
- create_text2music_ui(
1334
- gr=gr,
1335
- text2music_process_func=text2music_process_func,
1336
- sample_data_func=sample_data_func,
1337
- load_data_func=load_data_func,
1338
- )
1339
- return demo
1340
-
1341
-
1342
- if __name__ == "__main__":
1343
- demo = create_main_demo_ui()
1344
- demo.launch(
1345
- server_name="0.0.0.0",
1346
- server_port=7860,
1347
- share=True # 공유 링크 생성
1348
- )