ginipick commited on
Commit
1b7d348
·
verified ·
1 Parent(s): ac1a240

Delete ui/components-BACKUP3.py

Browse files
Files changed (1) hide show
  1. ui/components-BACKUP3.py +0 -1538
ui/components-BACKUP3.py DELETED
@@ -1,1538 +0,0 @@
1
- """
2
- ACE-Step: A Step Towards Music Generation Foundation Model
3
-
4
- https://github.com/ace-step/ACE-Step
5
-
6
- Apache 2.0 License
7
- """
8
-
9
- import gradio as gr
10
- import librosa
11
- import os
12
- import random
13
- import hashlib
14
- import numpy as np
15
- import json
16
- from typing import Dict, List, Tuple, Optional
17
-
18
- # [ADDED] OpenAI API 설정
19
- try:
20
- from openai import OpenAI
21
- api_key = os.getenv("LLM_API")
22
- if api_key:
23
- client = OpenAI(api_key=api_key)
24
- client_available = True
25
- print("✅ OpenAI API client initialized successfully")
26
- else:
27
- client = None
28
- client_available = False
29
- print("⚠️ Warning: No OpenAI API key found. AI lyrics generation will be disabled.")
30
- print("Set environment variable: export LLM_API='your-openai-api-key'")
31
- except Exception as e:
32
- client = None
33
- client_available = False
34
- print(f"❌ Warning: Failed to initialize OpenAI client: {e}")
35
-
36
- TAG_DEFAULT = "funk, pop, soul, rock, melodic, guitar, drums, bass, keyboard, percussion, 105 BPM, energetic, upbeat, groovy, vibrant, dynamic"
37
- LYRIC_DEFAULT = """[verse]
38
- Neon lights they flicker bright
39
- City hums in dead of night
40
- Rhythms pulse through concrete veins
41
- Lost in echoes of refrains
42
-
43
- [verse]
44
- Bassline groovin' in my chest
45
- Heartbeats match the city's zest
46
- Electric whispers fill the air
47
- Synthesized dreams everywhere
48
-
49
- [chorus]
50
- Turn it up and let it flow
51
- Feel the fire let it grow
52
- In this rhythm we belong
53
- Hear the night sing out our song
54
-
55
- [verse]
56
- Guitar strings they start to weep
57
- Wake the soul from silent sleep
58
- Every note a story told
59
- In this night we're bold and gold
60
-
61
- [bridge]
62
- Voices blend in harmony
63
- Lost in pure cacophony
64
- Timeless echoes timeless cries
65
- Soulful shouts beneath the skies
66
-
67
- [verse]
68
- Keyboard dances on the keys
69
- Melodies on evening breeze
70
- Catch the tune and hold it tight
71
- In this moment we take flight
72
- """
73
-
74
- # [ADDED] AI 작사 시스템 프롬프트
75
- LYRIC_SYSTEM_PROMPT = """너는 노래 가사를 작사하는 전문가 역할이다. 이용자가 입력하는 주제에 따라 이에 관련된 노래 가사를 작성하라. 가사의 규칙은 "[ ]"로 구분하여, 다음 예시를 참조하라.
76
-
77
- 예시:
78
- [verse]
79
- Neon lights they flicker bright
80
- City hums in dead of night
81
- Rhythms pulse through concrete veins
82
- Lost in echoes of refrains
83
-
84
- [verse]
85
- Bassline groovin' in my chest
86
- Heartbeats match the city's zest
87
- Electric whispers fill the air
88
- Synthesized dreams everywhere
89
-
90
- [chorus]
91
- Turn it up and let it flow
92
- Feel the fire let it grow
93
- In this rhythm we belong
94
- Hear the night sing out our song
95
-
96
- [verse]
97
- Guitar strings they start to weep
98
- Wake the soul from silent sleep
99
- Every note a story told
100
- In this night we're bold and gold
101
-
102
- [bridge]
103
- Voices blend in harmony
104
- Lost in pure cacophony
105
- Timeless echoes timeless cries
106
- Soulful shouts beneath the skies
107
-
108
- [verse]
109
- Keyboard dances on the keys
110
- Melodies on evening breeze
111
- Catch the tune and hold it tight
112
- In this moment we take flight
113
-
114
- 규칙:
115
- 1. 반드시 [verse], [chorus], [bridge] 등의 구조 태그를 사용할 것
116
- 2. 입력 언어와 동일한 언어로 가사를 작성할 것
117
- 3. 각 섹션은 4-8줄 정도로 구성할 것
118
- 4. 주제와 감정에 맞는 운율과 리듬감 있는 가사를 작성할 것"""
119
-
120
- # [ADDED] AI 작사 생성 함수
121
- def generate_lyrics_with_ai(theme: str, genre: str = None) -> str:
122
- """AI를 사용하여 주제 기반 가사 생성"""
123
- print(f"🎵 AI 작사 시작: 주제='{theme}', 장르='{genre}'")
124
-
125
- if not client_available or client is None:
126
- print("❌ OpenAI client not available, returning default lyrics")
127
- return LYRIC_DEFAULT
128
-
129
- if not theme or theme.strip() == "":
130
- print("⚠️ Empty theme, returning default lyrics")
131
- return LYRIC_DEFAULT
132
-
133
- try:
134
- # 장르 정보가 있으면 프롬프트에 추가
135
- user_prompt = f"다음 주제로 노래 가사를 작성해주세요: {theme}"
136
- if genre and genre != "Custom":
137
- user_prompt += f"\n장르: {genre}"
138
-
139
- print(f"📝 OpenAI API 호출 중...")
140
-
141
- # [MODIFIED] 사용자가 제시한 API 형식을 표준 형식으로 변환
142
- # 실제로는 client.responses.create가 아닌 client.chat.completions.create를 사용
143
- response = client.chat.completions.create(
144
- model="gpt-4o-mini", # gpt-4.1-mini는 존재하지 않는 모델명이므로 gpt-4o-mini 사용
145
- messages=[
146
- {
147
- "role": "system",
148
- "content": LYRIC_SYSTEM_PROMPT
149
- },
150
- {
151
- "role": "user",
152
- "content": user_prompt
153
- }
154
- ],
155
- temperature=0.8,
156
- max_tokens=1500,
157
- top_p=1
158
- )
159
-
160
- generated_lyrics = response.choices[0].message.content
161
- print(f"✅ AI 작사 완료")
162
- print(f"생성된 가사 미리보기: {generated_lyrics[:100]}...")
163
- return generated_lyrics
164
-
165
- except Exception as e:
166
- print(f"❌ AI 작사 생성 오류: {e}")
167
- import traceback
168
- print(f"상세 오류: {traceback.format_exc()}")
169
- return LYRIC_DEFAULT
170
-
171
- # 확장된 장르 프리셋 (기존 + 개선된 태그)
172
- GENRE_PRESETS = {
173
- "Modern Pop": "pop, synth, drums, guitar, 120 bpm, upbeat, catchy, vibrant, female vocals, polished vocals, radio-ready, commercial, layered vocals",
174
- "Rock": "rock, electric guitar, drums, bass, 130 bpm, energetic, rebellious, gritty, male vocals, raw vocals, power chords, driving rhythm",
175
- "Hip Hop": "hip hop, 808 bass, hi-hats, synth, 90 bpm, bold, urban, intense, male vocals, rhythmic vocals, trap beats, punchy drums",
176
- "Country": "country, acoustic guitar, steel guitar, fiddle, 100 bpm, heartfelt, rustic, warm, male vocals, twangy vocals, storytelling, americana",
177
- "EDM": "edm, synth, bass, kick drum, 128 bpm, euphoric, pulsating, energetic, instrumental, progressive build, festival anthem, electronic",
178
- "Reggae": "reggae, guitar, bass, drums, 80 bpm, chill, soulful, positive, male vocals, smooth vocals, offbeat rhythm, island vibes",
179
- "Classical": "classical, orchestral, strings, piano, 60 bpm, elegant, emotive, timeless, instrumental, dynamic range, sophisticated harmony",
180
- "Jazz": "jazz, saxophone, piano, double bass, 110 bpm, smooth, improvisational, soulful, male vocals, crooning vocals, swing feel, sophisticated",
181
- "Metal": "metal, electric guitar, double kick drum, bass, 160 bpm, aggressive, intense, heavy, male vocals, screamed vocals, distorted, powerful",
182
- "R&B": "r&b, synth, bass, drums, 85 bpm, sultry, groovy, romantic, female vocals, silky vocals, smooth production, neo-soul"
183
- }
184
-
185
- # 품질 프리셋 시스템 추가
186
- QUALITY_PRESETS = {
187
- "Draft (Fast)": {
188
- "infer_step": 50,
189
- "guidance_scale": 10.0,
190
- "scheduler_type": "euler",
191
- "omega_scale": 5.0,
192
- "use_erg_diffusion": False,
193
- "use_erg_tag": True,
194
- "description": "빠른 초안 생성 (1-2분)"
195
- },
196
- "Standard": {
197
- "infer_step": 150,
198
- "guidance_scale": 15.0,
199
- "scheduler_type": "euler",
200
- "omega_scale": 10.0,
201
- "use_erg_diffusion": True,
202
- "use_erg_tag": True,
203
- "description": "표준 품질 (3-5분)"
204
- },
205
- "High Quality": {
206
- "infer_step": 200,
207
- "guidance_scale": 18.0,
208
- "scheduler_type": "heun",
209
- "omega_scale": 15.0,
210
- "use_erg_diffusion": True,
211
- "use_erg_tag": True,
212
- "description": "고품질 생성 (8-12분)"
213
- },
214
- "Ultra (Best)": {
215
- "infer_step": 299,
216
- "guidance_scale": 20.0,
217
- "scheduler_type": "heun",
218
- "omega_scale": 20.0,
219
- "use_erg_diffusion": True,
220
- "use_erg_tag": True,
221
- "description": "최고 품질 (15-20분)"
222
- }
223
- }
224
-
225
- # 다중 시드 생성 설정
226
- MULTI_SEED_OPTIONS = {
227
- "Single": 1,
228
- "Best of 3": 3,
229
- "Best of 5": 5,
230
- "Best of 10": 10
231
- }
232
-
233
- class MusicGenerationCache:
234
- """생성 결과 캐싱 시스템"""
235
- def __init__(self):
236
- self.cache = {}
237
- self.max_cache_size = 50
238
-
239
- def get_cache_key(self, params):
240
- # 중요한 파라미터만으로 해시 생성
241
- key_params = {k: v for k, v in params.items()
242
- if k in ['prompt', 'lyrics', 'infer_step', 'guidance_scale', 'audio_duration']}
243
- return hashlib.md5(str(sorted(key_params.items())).encode()).hexdigest()[:16]
244
-
245
- def get_cached_result(self, params):
246
- key = self.get_cache_key(params)
247
- return self.cache.get(key)
248
-
249
- def cache_result(self, params, result):
250
- if len(self.cache) >= self.max_cache_size:
251
- oldest_key = next(iter(self.cache))
252
- del self.cache[oldest_key]
253
-
254
- key = self.get_cache_key(params)
255
- self.cache[key] = result
256
-
257
- # 전역 캐시 인스턴스
258
- generation_cache = MusicGenerationCache()
259
-
260
- def enhance_prompt_with_genre(base_prompt: str, genre: str) -> str:
261
- """장르에 따른 스마트 프롬프트 확장"""
262
- if genre == "Custom" or not genre:
263
- return base_prompt
264
-
265
- # 장르별 추가 개선 태그
266
- genre_enhancements = {
267
- "Modern Pop": ["polished production", "mainstream appeal", "hook-driven"],
268
- "Rock": ["guitar-driven", "powerful drums", "energetic performance"],
269
- "Hip Hop": ["rhythmic flow", "urban atmosphere", "bass-heavy"],
270
- "Country": ["acoustic warmth", "storytelling melody", "authentic feel"],
271
- "EDM": ["electronic atmosphere", "build-ups", "dance-friendly"],
272
- "Reggae": ["laid-back groove", "tropical vibes", "rhythmic guitar"],
273
- "Classical": ["orchestral depth", "musical sophistication", "timeless beauty"],
274
- "Jazz": ["musical complexity", "improvisational spirit", "sophisticated harmony"],
275
- "Metal": ["aggressive energy", "powerful sound", "intense atmosphere"],
276
- "R&B": ["smooth groove", "soulful expression", "rhythmic sophistication"]
277
- }
278
-
279
- if genre in genre_enhancements:
280
- additional_tags = ", ".join(genre_enhancements[genre])
281
- return f"{base_prompt}, {additional_tags}"
282
-
283
- return base_prompt
284
-
285
- def calculate_quality_score(audio_path: str) -> float:
286
- """간단한 품질 점수 계산 (실제 구현에서는 더 복잡한 메트릭 사용)"""
287
- try:
288
- y, sr = librosa.load(audio_path)
289
-
290
- # 기본 품질 메트릭
291
- rms_energy = np.sqrt(np.mean(y**2))
292
- spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
293
- zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y))
294
-
295
- # 정규화된 점수 (0-100)
296
- energy_score = min(rms_energy * 1000, 40) # 0-40점
297
- spectral_score = min(spectral_centroid / 100, 40) # 0-40점
298
- clarity_score = min((1 - zero_crossing_rate) * 20, 20) # 0-20점
299
-
300
- total_score = energy_score + spectral_score + clarity_score
301
- return round(total_score, 1)
302
- except:
303
- return 50.0 # 기본값
304
-
305
- def update_tags_from_preset(preset_name):
306
- if preset_name == "Custom":
307
- return ""
308
- return GENRE_PRESETS.get(preset_name, "")
309
-
310
- def update_quality_preset(preset_name):
311
- """품질 프리셋 적용"""
312
- if preset_name not in QUALITY_PRESETS:
313
- return (100, 15.0, "euler", 10.0, True, True)
314
-
315
- preset = QUALITY_PRESETS[preset_name]
316
- return (
317
- preset.get("infer_step", 100),
318
- preset.get("guidance_scale", 15.0),
319
- preset.get("scheduler_type", "euler"),
320
- preset.get("omega_scale", 10.0),
321
- preset.get("use_erg_diffusion", True),
322
- preset.get("use_erg_tag", True)
323
- )
324
-
325
- def create_enhanced_process_func(original_func):
326
- """기존 함수를 향상된 기능으로 래핑"""
327
-
328
- def enhanced_func(
329
- audio_duration, prompt, lyrics, infer_step, guidance_scale,
330
- scheduler_type, cfg_type, omega_scale, manual_seeds,
331
- guidance_interval, guidance_interval_decay, min_guidance_scale,
332
- use_erg_tag, use_erg_lyric, use_erg_diffusion, oss_steps,
333
- guidance_scale_text, guidance_scale_lyric,
334
- audio2audio_enable=False, ref_audio_strength=0.5, ref_audio_input=None,
335
- lora_name_or_path="none", multi_seed_mode="Single",
336
- enable_smart_enhancement=True, genre_preset="Custom", **kwargs
337
- ):
338
- print(f"🎵 Enhanced generation started")
339
- print(f"Parameters: duration={audio_duration}, prompt='{prompt[:50]}...', multi_seed={multi_seed_mode}")
340
-
341
- # 스마트 프롬프트 확장
342
- if enable_smart_enhancement and genre_preset != "Custom":
343
- enhanced_prompt = enhance_prompt_with_genre(prompt, genre_preset)
344
- print(f"Enhanced prompt: {enhanced_prompt[:100]}...")
345
- else:
346
- enhanced_prompt = prompt
347
-
348
- # 캐시 확인
349
- cache_params = {
350
- 'prompt': enhanced_prompt, 'lyrics': lyrics, 'audio_duration': audio_duration,
351
- 'infer_step': infer_step, 'guidance_scale': guidance_scale
352
- }
353
-
354
- cached_result = generation_cache.get_cached_result(cache_params)
355
- if cached_result:
356
- print("Using cached result")
357
- return cached_result
358
-
359
- # 다중 시드 생성
360
- num_candidates = MULTI_SEED_OPTIONS.get(multi_seed_mode, 1)
361
- print(f"Generating {num_candidates} candidates")
362
-
363
- if num_candidates == 1:
364
- # 기존 함수 호출
365
- result = original_func(
366
- audio_duration, enhanced_prompt, lyrics, infer_step, guidance_scale,
367
- scheduler_type, cfg_type, omega_scale, manual_seeds,
368
- guidance_interval, guidance_interval_decay, min_guidance_scale,
369
- use_erg_tag, use_erg_lyric, use_erg_diffusion, oss_steps,
370
- guidance_scale_text, guidance_scale_lyric, audio2audio_enable,
371
- ref_audio_strength, ref_audio_input, lora_name_or_path, **kwargs
372
- )
373
- else:
374
- # 다중 시드 생성을 위한 임시 구현
375
- result = original_func(
376
- audio_duration, enhanced_prompt, lyrics, infer_step, guidance_scale,
377
- scheduler_type, cfg_type, omega_scale, manual_seeds,
378
- guidance_interval, guidance_interval_decay, min_guidance_scale,
379
- use_erg_tag, use_erg_lyric, use_erg_diffusion, oss_steps,
380
- guidance_scale_text, guidance_scale_lyric, audio2audio_enable,
381
- ref_audio_strength, ref_audio_input, lora_name_or_path, **kwargs
382
- )
383
-
384
- # 결과 캐시
385
- generation_cache.cache_result(cache_params, result)
386
- print(f"Generation completed")
387
- return result
388
-
389
- return enhanced_func
390
-
391
- def create_output_ui(task_name="Text2Music"):
392
- # For many consumer-grade GPU devices, only one batch can be run
393
- output_audio1 = gr.Audio(type="filepath", label=f"{task_name} Generated Audio 1")
394
-
395
- with gr.Accordion(f"{task_name} Parameters & Quality Info", open=False):
396
- input_params_json = gr.JSON(label=f"{task_name} Parameters")
397
-
398
- # 품질 정보 표시 추가
399
- with gr.Row():
400
- quality_score = gr.Number(label="Quality Score (0-100)", value=0, interactive=False)
401
- generation_info = gr.Textbox(
402
- label="Generation Info",
403
- value="",
404
- interactive=False,
405
- max_lines=2
406
- )
407
-
408
- outputs = [output_audio1]
409
- return outputs, input_params_json
410
-
411
- def dump_func(*args):
412
- """더미 함수 - 실제 음악 생성 대신 로그만 출력"""
413
- print(f"🎵 Dummy function called with {len(args)} arguments")
414
- if args:
415
- print(f"Parameters preview: duration={args[0] if len(args) > 0 else 'N/A'}, prompt='{args[1][:50] if len(args) > 1 else 'N/A'}...'")
416
-
417
- # 가짜 결과 반환 (실제 구현에서는 진짜 음악 생성 결과)
418
- dummy_result = [
419
- None, # 오디오 파일 경로 (None이면 오디오 생성 안됨)
420
- {
421
- "prompt": args[1] if len(args) > 1 else "test",
422
- "lyrics": args[2] if len(args) > 2 else "test lyrics",
423
- "audio_duration": args[0] if len(args) > 0 else 30,
424
- "status": "완료 (더미 모드 - 실제 음악 생성 안됨)",
425
- "infer_step": args[3] if len(args) > 3 else 150,
426
- "guidance_scale": args[4] if len(args) > 4 else 15.0,
427
- "scheduler_type": args[5] if len(args) > 5 else "euler",
428
- "cfg_type": args[6] if len(args) > 6 else "apg",
429
- "omega_scale": args[7] if len(args) > 7 else 10.0,
430
- "actual_seeds": [1234],
431
- "guidance_interval": args[9] if len(args) > 9 else 0.5,
432
- "guidance_interval_decay": args[10] if len(args) > 10 else 0.0,
433
- "min_guidance_scale": args[11] if len(args) > 11 else 3.0,
434
- "use_erg_tag": args[12] if len(args) > 12 else True,
435
- "use_erg_lyric": args[13] if len(args) > 13 else False,
436
- "use_erg_diffusion": args[14] if len(args) > 14 else True,
437
- "oss_steps": [],
438
- "guidance_scale_text": args[16] if len(args) > 16 else 0.0,
439
- "guidance_scale_lyric": args[17] if len(args) > 17 else 0.0,
440
- "audio2audio_enable": args[18] if len(args) > 18 else False,
441
- "ref_audio_strength": args[19] if len(args) > 19 else 0.5,
442
- "ref_audio_input": args[20] if len(args) > 20 else None,
443
- "audio_path": None
444
- }
445
- ]
446
- return dummy_result
447
-
448
- def create_text2music_ui(
449
- gr,
450
- text2music_process_func,
451
- sample_data_func=None,
452
- load_data_func=None,
453
- ):
454
- # 향상된 프로세스 함수 생성
455
- enhanced_process_func = create_enhanced_process_func(text2music_process_func)
456
-
457
- with gr.Row():
458
- with gr.Column():
459
- # 품질 및 성능 설정 섹션 추가
460
- with gr.Group():
461
- gr.Markdown("### ⚡ 품질 & 성능 설정")
462
- with gr.Row():
463
- quality_preset = gr.Dropdown(
464
- choices=list(QUALITY_PRESETS.keys()),
465
- value="Standard",
466
- label="품질 프리셋",
467
- scale=2
468
- )
469
- multi_seed_mode = gr.Dropdown(
470
- choices=list(MULTI_SEED_OPTIONS.keys()),
471
- value="Single",
472
- label="다중 생성 모드",
473
- scale=2,
474
- info="여러 번 생성하여 최고 품질 선택"
475
- )
476
-
477
- preset_description = gr.Textbox(
478
- value=QUALITY_PRESETS["Standard"]["description"],
479
- label="설명",
480
- interactive=False,
481
- max_lines=1
482
- )
483
-
484
- with gr.Row(equal_height=True):
485
- # add markdown, tags and lyrics examples are from ai music generation community
486
- audio_duration = gr.Slider(
487
- -1,
488
- 240.0,
489
- step=0.00001,
490
- value=-1,
491
- label="Audio Duration",
492
- interactive=True,
493
- info="-1 means random duration (30 ~ 240).",
494
- scale=7,
495
- )
496
- sample_bnt = gr.Button("Sample", variant="secondary", scale=1)
497
- preview_bnt = gr.Button("🎵 Preview", variant="secondary", scale=2)
498
-
499
- # audio2audio
500
- with gr.Row(equal_height=True):
501
- audio2audio_enable = gr.Checkbox(
502
- label="Enable Audio2Audio",
503
- value=False,
504
- info="Check to enable Audio-to-Audio generation using a reference audio.",
505
- elem_id="audio2audio_checkbox"
506
- )
507
- lora_name_or_path = gr.Dropdown(
508
- label="Lora Name or Path",
509
- choices=["ACE-Step/ACE-Step-v1-chinese-rap-LoRA", "none"],
510
- value="none",
511
- allow_custom_value=True,
512
- )
513
-
514
- ref_audio_input = gr.Audio(
515
- type="filepath",
516
- label="Reference Audio (for Audio2Audio)",
517
- visible=False,
518
- elem_id="ref_audio_input",
519
- show_download_button=True
520
- )
521
- ref_audio_strength = gr.Slider(
522
- label="Refer audio strength",
523
- minimum=0.0,
524
- maximum=1.0,
525
- step=0.01,
526
- value=0.5,
527
- elem_id="ref_audio_strength",
528
- visible=False,
529
- interactive=True,
530
- )
531
-
532
- def toggle_ref_audio_visibility(is_checked):
533
- return (
534
- gr.update(visible=is_checked, elem_id="ref_audio_input"),
535
- gr.update(visible=is_checked, elem_id="ref_audio_strength"),
536
- )
537
-
538
- audio2audio_enable.change(
539
- fn=toggle_ref_audio_visibility,
540
- inputs=[audio2audio_enable],
541
- outputs=[ref_audio_input, ref_audio_strength],
542
- )
543
-
544
- with gr.Column(scale=2):
545
- with gr.Group():
546
- gr.Markdown("""### 🎼 스마트 프롬프트 시스템
547
- <center>장르 선택 시 자동으로 최적화된 태그가 추가됩니다. 콤마로 구분하여 태그를 입력하세요.</center>""")
548
-
549
- with gr.Row():
550
- genre_preset = gr.Dropdown(
551
- choices=["Custom"] + list(GENRE_PRESETS.keys()),
552
- value="Custom",
553
- label="장르 프리셋",
554
- scale=1,
555
- )
556
- enable_smart_enhancement = gr.Checkbox(
557
- label="스마트 향상",
558
- value=True,
559
- info="자동 태그 최적화",
560
- scale=1
561
- )
562
-
563
- prompt = gr.Textbox(
564
- lines=2,
565
- label="Tags",
566
- max_lines=4,
567
- value=TAG_DEFAULT,
568
- placeholder="콤마로 구분된 태그들...",
569
- )
570
-
571
- # [ADDED] AI 작사 시스템 UI
572
- with gr.Group():
573
- gr.Markdown("""### 🤖 AI 작사 시스템
574
- <center>주제를 입력하고 'AI 작사' 버튼을 클릭하면 자동으로 가사가 생성됩니다.</center>""")
575
-
576
- with gr.Row():
577
- lyric_theme_input = gr.Textbox(
578
- label="작사 주제",
579
- placeholder="예: 첫사랑의 설렘, 이별의 아픔, 군대가는 남자의 한숨, 희망찬 내일...",
580
- scale=3,
581
- interactive=True
582
- )
583
- generate_lyrics_btn = gr.Button("🤖 AI 작사", variant="secondary", scale=1)
584
-
585
- # API 상태 표시
586
- api_status = gr.Textbox(
587
- value="✅ AI 작사 기능 활성화됨" if client_available else "❌ API 키가 설정되지 않음 (export LLM_API='your-key')",
588
- label="API 상태",
589
- interactive=False,
590
- max_lines=1,
591
- scale=1
592
- )
593
-
594
- with gr.Group():
595
- gr.Markdown("""### 📝 가사 입력
596
- <center>구조 태그 [verse], [chorus], [bridge] 사용을 권장합니다.<br>[instrumental] 또는 [inst]를 사용하면 연주곡을 생성합니다.</center>""")
597
- lyrics = gr.Textbox(
598
- lines=9,
599
- label="Lyrics",
600
- max_lines=13,
601
- value=LYRIC_DEFAULT,
602
- placeholder="가사를 입력하세요. [verse], [chorus] 등의 구조 태그 사용을 권장합니다."
603
- )
604
-
605
- with gr.Accordion("Basic Settings", open=False):
606
- infer_step = gr.Slider(
607
- minimum=1,
608
- maximum=300,
609
- step=1,
610
- value=150,
611
- label="Infer Steps",
612
- interactive=True,
613
- )
614
- guidance_scale = gr.Slider(
615
- minimum=0.0,
616
- maximum=30.0,
617
- step=0.1,
618
- value=15.0,
619
- label="Guidance Scale",
620
- interactive=True,
621
- info="When guidance_scale_lyric > 1 and guidance_scale_text > 1, the guidance scale will not be applied.",
622
- )
623
- guidance_scale_text = gr.Slider(
624
- minimum=0.0,
625
- maximum=10.0,
626
- step=0.1,
627
- value=0.0,
628
- label="Guidance Scale Text",
629
- interactive=True,
630
- info="Guidance scale for text condition. It can only apply to cfg. set guidance_scale_text=5.0, guidance_scale_lyric=1.5 for start",
631
- )
632
- guidance_scale_lyric = gr.Slider(
633
- minimum=0.0,
634
- maximum=10.0,
635
- step=0.1,
636
- value=0.0,
637
- label="Guidance Scale Lyric",
638
- interactive=True,
639
- )
640
-
641
- manual_seeds = gr.Textbox(
642
- label="manual seeds (default None)",
643
- placeholder="1,2,3,4",
644
- value=None,
645
- info="Seed for the generation",
646
- )
647
-
648
- with gr.Accordion("Advanced Settings", open=False):
649
- scheduler_type = gr.Radio(
650
- ["euler", "heun"],
651
- value="euler",
652
- label="Scheduler Type",
653
- elem_id="scheduler_type",
654
- info="Scheduler type for the generation. euler is recommended. heun will take more time.",
655
- )
656
- cfg_type = gr.Radio(
657
- ["cfg", "apg", "cfg_star"],
658
- value="apg",
659
- label="CFG Type",
660
- elem_id="cfg_type",
661
- info="CFG type for the generation. apg is recommended. cfg and cfg_star are almost the same.",
662
- )
663
- use_erg_tag = gr.Checkbox(
664
- label="use ERG for tag",
665
- value=True,
666
- info="Use Entropy Rectifying Guidance for tag. It will multiple a temperature to the attention to make a weaker tag condition and make better diversity.",
667
- )
668
- use_erg_lyric = gr.Checkbox(
669
- label="use ERG for lyric",
670
- value=False,
671
- info="The same but apply to lyric encoder's attention.",
672
- )
673
- use_erg_diffusion = gr.Checkbox(
674
- label="use ERG for diffusion",
675
- value=True,
676
- info="The same but apply to diffusion model's attention.",
677
- )
678
-
679
- omega_scale = gr.Slider(
680
- minimum=-100.0,
681
- maximum=100.0,
682
- step=0.1,
683
- value=10.0,
684
- label="Granularity Scale",
685
- interactive=True,
686
- info="Granularity scale for the generation. Higher values can reduce artifacts",
687
- )
688
-
689
- guidance_interval = gr.Slider(
690
- minimum=0.0,
691
- maximum=1.0,
692
- step=0.01,
693
- value=0.5,
694
- label="Guidance Interval",
695
- interactive=True,
696
- info="Guidance interval for the generation. 0.5 means only apply guidance in the middle steps (0.25 * infer_steps to 0.75 * infer_steps)",
697
- )
698
- guidance_interval_decay = gr.Slider(
699
- minimum=0.0,
700
- maximum=1.0,
701
- step=0.01,
702
- value=0.0,
703
- label="Guidance Interval Decay",
704
- interactive=True,
705
- info="Guidance interval decay for the generation. Guidance scale will decay from guidance_scale to min_guidance_scale in the interval. 0.0 means no decay.",
706
- )
707
- min_guidance_scale = gr.Slider(
708
- minimum=0.0,
709
- maximum=200.0,
710
- step=0.1,
711
- value=3.0,
712
- label="Min Guidance Scale",
713
- interactive=True,
714
- info="Min guidance scale for guidance interval decay's end scale",
715
- )
716
- oss_steps = gr.Textbox(
717
- label="OSS Steps",
718
- placeholder="16, 29, 52, 96, 129, 158, 172, 183, 189, 200",
719
- value=None,
720
- info="Optimal Steps for the generation. But not test well",
721
- )
722
-
723
- text2music_bnt = gr.Button("🎵 Generate Music", variant="primary", size="lg")
724
-
725
- # [ADDED] AI 작사 이벤트 핸들러
726
- def handle_ai_lyrics_generation(theme, genre):
727
- """AI 작사 버튼 클릭 처리"""
728
- print(f"🤖 AI 작사 버튼 클릭: 주제='{theme}', 장르='{genre}'")
729
-
730
- if not theme or theme.strip() == "":
731
- return "⚠️ 작사 주제를 입력해주세요!"
732
-
733
- try:
734
- generated_lyrics = generate_lyrics_with_ai(theme, genre)
735
- return generated_lyrics
736
- except Exception as e:
737
- print(f"작사 생성 중 오류: {e}")
738
- return f"❌ 작사 생성 중 오류가 발생했습니다: {str(e)}"
739
-
740
- generate_lyrics_btn.click(
741
- fn=handle_ai_lyrics_generation,
742
- inputs=[lyric_theme_input, genre_preset],
743
- outputs=[lyrics]
744
- )
745
-
746
- # 모든 UI 요소가 정의된 후 이벤트 핸들러 설정
747
- genre_preset.change(
748
- fn=update_tags_from_preset,
749
- inputs=[genre_preset],
750
- outputs=[prompt]
751
- )
752
-
753
- quality_preset.change(
754
- fn=lambda x: QUALITY_PRESETS.get(x, {}).get("description", ""),
755
- inputs=[quality_preset],
756
- outputs=[preset_description]
757
- )
758
-
759
- quality_preset.change(
760
- fn=update_quality_preset,
761
- inputs=[quality_preset],
762
- outputs=[infer_step, guidance_scale, scheduler_type, omega_scale, use_erg_diffusion, use_erg_tag]
763
- )
764
-
765
- with gr.Column():
766
- outputs, input_params_json = create_output_ui()
767
-
768
- # 실시간 프리뷰 기능
769
- def generate_preview(prompt, lyrics, genre_preset):
770
- """10초 프리뷰 생성"""
771
- preview_params = {
772
- "audio_duration": 10,
773
- "infer_step": 50,
774
- "guidance_scale": 12.0,
775
- "scheduler_type": "euler",
776
- "cfg_type": "apg",
777
- "omega_scale": 5.0,
778
- }
779
-
780
- enhanced_prompt = enhance_prompt_with_genre(prompt, genre_preset) if genre_preset != "Custom" else prompt
781
-
782
- try:
783
- # 실제 구현에서는 빠른 생성 모드 사용
784
- result = enhanced_process_func(
785
- preview_params["audio_duration"],
786
- enhanced_prompt,
787
- lyrics[:200], # 가사 일부만 사용
788
- preview_params["infer_step"],
789
- preview_params["guidance_scale"],
790
- preview_params["scheduler_type"],
791
- preview_params["cfg_type"],
792
- preview_params["omega_scale"],
793
- None, # manual_seeds
794
- 0.5, # guidance_interval
795
- 0.0, # guidance_interval_decay
796
- 3.0, # min_guidance_scale
797
- True, # use_erg_tag
798
- False, # use_erg_lyric
799
- True, # use_erg_diffusion
800
- None, # oss_steps
801
- 0.0, # guidance_scale_text
802
- 0.0, # guidance_scale_lyric
803
- multi_seed_mode="Single"
804
- )
805
- return result[0] if result else None
806
- except Exception as e:
807
- return f"프리뷰 생성 실패: {str(e)}"
808
-
809
- preview_bnt.click(
810
- fn=generate_preview,
811
- inputs=[prompt, lyrics, genre_preset],
812
- outputs=[outputs[0]]
813
- )
814
-
815
- with gr.Tab("retake"):
816
- retake_variance = gr.Slider(
817
- minimum=0.0, maximum=1.0, step=0.01, value=0.2, label="variance"
818
- )
819
- retake_seeds = gr.Textbox(
820
- label="retake seeds (default None)", placeholder="", value=None
821
- )
822
- retake_bnt = gr.Button("Retake", variant="primary")
823
- retake_outputs, retake_input_params_json = create_output_ui("Retake")
824
-
825
- def retake_process_func(json_data, retake_variance, retake_seeds):
826
- return enhanced_process_func(
827
- json_data.get("audio_duration", 30),
828
- json_data.get("prompt", ""),
829
- json_data.get("lyrics", ""),
830
- json_data.get("infer_step", 100),
831
- json_data.get("guidance_scale", 15.0),
832
- json_data.get("scheduler_type", "euler"),
833
- json_data.get("cfg_type", "apg"),
834
- json_data.get("omega_scale", 10.0),
835
- retake_seeds,
836
- json_data.get("guidance_interval", 0.5),
837
- json_data.get("guidance_interval_decay", 0.0),
838
- json_data.get("min_guidance_scale", 3.0),
839
- json_data.get("use_erg_tag", True),
840
- json_data.get("use_erg_lyric", False),
841
- json_data.get("use_erg_diffusion", True),
842
- json_data.get("oss_steps", None),
843
- json_data.get("guidance_scale_text", 0.0),
844
- json_data.get("guidance_scale_lyric", 0.0),
845
- audio2audio_enable=json_data.get("audio2audio_enable", False),
846
- ref_audio_strength=json_data.get("ref_audio_strength", 0.5),
847
- ref_audio_input=json_data.get("ref_audio_input", None),
848
- lora_name_or_path=json_data.get("lora_name_or_path", "none"),
849
- multi_seed_mode="Best of 3", # retake는 자동으로 다중 생성
850
- retake_variance=retake_variance,
851
- task="retake"
852
- )
853
-
854
- retake_bnt.click(
855
- fn=retake_process_func,
856
- inputs=[
857
- input_params_json,
858
- retake_variance,
859
- retake_seeds,
860
- ],
861
- outputs=retake_outputs + [retake_input_params_json],
862
- )
863
-
864
- with gr.Tab("repainting"):
865
- retake_variance = gr.Slider(
866
- minimum=0.0, maximum=1.0, step=0.01, value=0.2, label="variance"
867
- )
868
- retake_seeds = gr.Textbox(
869
- label="repaint seeds (default None)", placeholder="", value=None
870
- )
871
- repaint_start = gr.Slider(
872
- minimum=0.0,
873
- maximum=240.0,
874
- step=0.01,
875
- value=0.0,
876
- label="Repaint Start Time",
877
- interactive=True,
878
- )
879
- repaint_end = gr.Slider(
880
- minimum=0.0,
881
- maximum=240.0,
882
- step=0.01,
883
- value=30.0,
884
- label="Repaint End Time",
885
- interactive=True,
886
- )
887
- repaint_source = gr.Radio(
888
- ["text2music", "last_repaint", "upload"],
889
- value="text2music",
890
- label="Repaint Source",
891
- elem_id="repaint_source",
892
- )
893
-
894
- repaint_source_audio_upload = gr.Audio(
895
- label="Upload Audio",
896
- type="filepath",
897
- visible=False,
898
- elem_id="repaint_source_audio_upload",
899
- show_download_button=True,
900
- )
901
- repaint_source.change(
902
- fn=lambda x: gr.update(
903
- visible=x == "upload", elem_id="repaint_source_audio_upload"
904
- ),
905
- inputs=[repaint_source],
906
- outputs=[repaint_source_audio_upload],
907
- )
908
-
909
- repaint_bnt = gr.Button("Repaint", variant="primary")
910
- repaint_outputs, repaint_input_params_json = create_output_ui("Repaint")
911
-
912
- def repaint_process_func(
913
- text2music_json_data,
914
- repaint_json_data,
915
- retake_variance,
916
- retake_seeds,
917
- repaint_start,
918
- repaint_end,
919
- repaint_source,
920
- repaint_source_audio_upload,
921
- prompt,
922
- lyrics,
923
- infer_step,
924
- guidance_scale,
925
- scheduler_type,
926
- cfg_type,
927
- omega_scale,
928
- manual_seeds,
929
- guidance_interval,
930
- guidance_interval_decay,
931
- min_guidance_scale,
932
- use_erg_tag,
933
- use_erg_lyric,
934
- use_erg_diffusion,
935
- oss_steps,
936
- guidance_scale_text,
937
- guidance_scale_lyric,
938
- ):
939
- if repaint_source == "upload":
940
- src_audio_path = repaint_source_audio_upload
941
- audio_duration = librosa.get_duration(filename=src_audio_path)
942
- json_data = {"audio_duration": audio_duration}
943
- elif repaint_source == "text2music":
944
- json_data = text2music_json_data
945
- src_audio_path = json_data["audio_path"]
946
- elif repaint_source == "last_repaint":
947
- json_data = repaint_json_data
948
- src_audio_path = json_data["audio_path"]
949
-
950
- return enhanced_process_func(
951
- json_data["audio_duration"],
952
- prompt,
953
- lyrics,
954
- infer_step,
955
- guidance_scale,
956
- scheduler_type,
957
- cfg_type,
958
- omega_scale,
959
- manual_seeds,
960
- guidance_interval,
961
- guidance_interval_decay,
962
- min_guidance_scale,
963
- use_erg_tag,
964
- use_erg_lyric,
965
- use_erg_diffusion,
966
- oss_steps,
967
- guidance_scale_text,
968
- guidance_scale_lyric,
969
- retake_seeds=retake_seeds,
970
- retake_variance=retake_variance,
971
- task="repaint",
972
- repaint_start=repaint_start,
973
- repaint_end=repaint_end,
974
- src_audio_path=src_audio_path,
975
- lora_name_or_path="none"
976
- )
977
-
978
- repaint_bnt.click(
979
- fn=repaint_process_func,
980
- inputs=[
981
- input_params_json,
982
- repaint_input_params_json,
983
- retake_variance,
984
- retake_seeds,
985
- repaint_start,
986
- repaint_end,
987
- repaint_source,
988
- repaint_source_audio_upload,
989
- prompt,
990
- lyrics,
991
- infer_step,
992
- guidance_scale,
993
- scheduler_type,
994
- cfg_type,
995
- omega_scale,
996
- manual_seeds,
997
- guidance_interval,
998
- guidance_interval_decay,
999
- min_guidance_scale,
1000
- use_erg_tag,
1001
- use_erg_lyric,
1002
- use_erg_diffusion,
1003
- oss_steps,
1004
- guidance_scale_text,
1005
- guidance_scale_lyric,
1006
- ],
1007
- outputs=repaint_outputs + [repaint_input_params_json],
1008
- )
1009
-
1010
- with gr.Tab("edit"):
1011
- edit_prompt = gr.Textbox(lines=2, label="Edit Tags", max_lines=4)
1012
- edit_lyrics = gr.Textbox(lines=9, label="Edit Lyrics", max_lines=13)
1013
- retake_seeds = gr.Textbox(
1014
- label="edit seeds (default None)", placeholder="", value=None
1015
- )
1016
-
1017
- edit_type = gr.Radio(
1018
- ["only_lyrics", "remix"],
1019
- value="only_lyrics",
1020
- label="Edit Type",
1021
- elem_id="edit_type",
1022
- info="`only_lyrics` will keep the whole song the same except lyrics difference. Make your diffrence smaller, e.g. one lyrc line change.\nremix can change the song melody and genre",
1023
- )
1024
- edit_n_min = gr.Slider(
1025
- minimum=0.0,
1026
- maximum=1.0,
1027
- step=0.01,
1028
- value=0.6,
1029
- label="edit_n_min",
1030
- interactive=True,
1031
- )
1032
- edit_n_max = gr.Slider(
1033
- minimum=0.0,
1034
- maximum=1.0,
1035
- step=0.01,
1036
- value=1.0,
1037
- label="edit_n_max",
1038
- interactive=True,
1039
- )
1040
-
1041
- def edit_type_change_func(edit_type):
1042
- if edit_type == "only_lyrics":
1043
- n_min = 0.6
1044
- n_max = 1.0
1045
- elif edit_type == "remix":
1046
- n_min = 0.2
1047
- n_max = 0.4
1048
- return n_min, n_max
1049
-
1050
- edit_type.change(
1051
- edit_type_change_func,
1052
- inputs=[edit_type],
1053
- outputs=[edit_n_min, edit_n_max],
1054
- )
1055
-
1056
- edit_source = gr.Radio(
1057
- ["text2music", "last_edit", "upload"],
1058
- value="text2music",
1059
- label="Edit Source",
1060
- elem_id="edit_source",
1061
- )
1062
- edit_source_audio_upload = gr.Audio(
1063
- label="Upload Audio",
1064
- type="filepath",
1065
- visible=False,
1066
- elem_id="edit_source_audio_upload",
1067
- show_download_button=True,
1068
- )
1069
- edit_source.change(
1070
- fn=lambda x: gr.update(
1071
- visible=x == "upload", elem_id="edit_source_audio_upload"
1072
- ),
1073
- inputs=[edit_source],
1074
- outputs=[edit_source_audio_upload],
1075
- )
1076
-
1077
- edit_bnt = gr.Button("Edit", variant="primary")
1078
- edit_outputs, edit_input_params_json = create_output_ui("Edit")
1079
-
1080
- def edit_process_func(
1081
- text2music_json_data,
1082
- edit_input_params_json,
1083
- edit_source,
1084
- edit_source_audio_upload,
1085
- prompt,
1086
- lyrics,
1087
- edit_prompt,
1088
- edit_lyrics,
1089
- edit_n_min,
1090
- edit_n_max,
1091
- infer_step,
1092
- guidance_scale,
1093
- scheduler_type,
1094
- cfg_type,
1095
- omega_scale,
1096
- manual_seeds,
1097
- guidance_interval,
1098
- guidance_interval_decay,
1099
- min_guidance_scale,
1100
- use_erg_tag,
1101
- use_erg_lyric,
1102
- use_erg_diffusion,
1103
- oss_steps,
1104
- guidance_scale_text,
1105
- guidance_scale_lyric,
1106
- retake_seeds,
1107
- ):
1108
- if edit_source == "upload":
1109
- src_audio_path = edit_source_audio_upload
1110
- audio_duration = librosa.get_duration(filename=src_audio_path)
1111
- json_data = {"audio_duration": audio_duration}
1112
- elif edit_source == "text2music":
1113
- json_data = text2music_json_data
1114
- src_audio_path = json_data["audio_path"]
1115
- elif edit_source == "last_edit":
1116
- json_data = edit_input_params_json
1117
- src_audio_path = json_data["audio_path"]
1118
-
1119
- if not edit_prompt:
1120
- edit_prompt = prompt
1121
- if not edit_lyrics:
1122
- edit_lyrics = lyrics
1123
-
1124
- return enhanced_process_func(
1125
- json_data["audio_duration"],
1126
- prompt,
1127
- lyrics,
1128
- infer_step,
1129
- guidance_scale,
1130
- scheduler_type,
1131
- cfg_type,
1132
- omega_scale,
1133
- manual_seeds,
1134
- guidance_interval,
1135
- guidance_interval_decay,
1136
- min_guidance_scale,
1137
- use_erg_tag,
1138
- use_erg_lyric,
1139
- use_erg_diffusion,
1140
- oss_steps,
1141
- guidance_scale_text,
1142
- guidance_scale_lyric,
1143
- task="edit",
1144
- src_audio_path=src_audio_path,
1145
- edit_target_prompt=edit_prompt,
1146
- edit_target_lyrics=edit_lyrics,
1147
- edit_n_min=edit_n_min,
1148
- edit_n_max=edit_n_max,
1149
- retake_seeds=retake_seeds,
1150
- lora_name_or_path="none"
1151
- )
1152
-
1153
- edit_bnt.click(
1154
- fn=edit_process_func,
1155
- inputs=[
1156
- input_params_json,
1157
- edit_input_params_json,
1158
- edit_source,
1159
- edit_source_audio_upload,
1160
- prompt,
1161
- lyrics,
1162
- edit_prompt,
1163
- edit_lyrics,
1164
- edit_n_min,
1165
- edit_n_max,
1166
- infer_step,
1167
- guidance_scale,
1168
- scheduler_type,
1169
- cfg_type,
1170
- omega_scale,
1171
- manual_seeds,
1172
- guidance_interval,
1173
- guidance_interval_decay,
1174
- min_guidance_scale,
1175
- use_erg_tag,
1176
- use_erg_lyric,
1177
- use_erg_diffusion,
1178
- oss_steps,
1179
- guidance_scale_text,
1180
- guidance_scale_lyric,
1181
- retake_seeds,
1182
- ],
1183
- outputs=edit_outputs + [edit_input_params_json],
1184
- )
1185
-
1186
- with gr.Tab("extend"):
1187
- extend_seeds = gr.Textbox(
1188
- label="extend seeds (default None)", placeholder="", value=None
1189
- )
1190
- left_extend_length = gr.Slider(
1191
- minimum=0.0,
1192
- maximum=240.0,
1193
- step=0.01,
1194
- value=0.0,
1195
- label="Left Extend Length",
1196
- interactive=True,
1197
- )
1198
- right_extend_length = gr.Slider(
1199
- minimum=0.0,
1200
- maximum=240.0,
1201
- step=0.01,
1202
- value=30.0,
1203
- label="Right Extend Length",
1204
- interactive=True,
1205
- )
1206
- extend_source = gr.Radio(
1207
- ["text2music", "last_extend", "upload"],
1208
- value="text2music",
1209
- label="Extend Source",
1210
- elem_id="extend_source",
1211
- )
1212
-
1213
- extend_source_audio_upload = gr.Audio(
1214
- label="Upload Audio",
1215
- type="filepath",
1216
- visible=False,
1217
- elem_id="extend_source_audio_upload",
1218
- show_download_button=True,
1219
- )
1220
- extend_source.change(
1221
- fn=lambda x: gr.update(
1222
- visible=x == "upload", elem_id="extend_source_audio_upload"
1223
- ),
1224
- inputs=[extend_source],
1225
- outputs=[extend_source_audio_upload],
1226
- )
1227
-
1228
- extend_bnt = gr.Button("Extend", variant="primary")
1229
- extend_outputs, extend_input_params_json = create_output_ui("Extend")
1230
-
1231
- def extend_process_func(
1232
- text2music_json_data,
1233
- extend_input_params_json,
1234
- extend_seeds,
1235
- left_extend_length,
1236
- right_extend_length,
1237
- extend_source,
1238
- extend_source_audio_upload,
1239
- prompt,
1240
- lyrics,
1241
- infer_step,
1242
- guidance_scale,
1243
- scheduler_type,
1244
- cfg_type,
1245
- omega_scale,
1246
- manual_seeds,
1247
- guidance_interval,
1248
- guidance_interval_decay,
1249
- min_guidance_scale,
1250
- use_erg_tag,
1251
- use_erg_lyric,
1252
- use_erg_diffusion,
1253
- oss_steps,
1254
- guidance_scale_text,
1255
- guidance_scale_lyric,
1256
- ):
1257
- if extend_source == "upload":
1258
- src_audio_path = extend_source_audio_upload
1259
- # get audio duration
1260
- audio_duration = librosa.get_duration(filename=src_audio_path)
1261
- json_data = {"audio_duration": audio_duration}
1262
- elif extend_source == "text2music":
1263
- json_data = text2music_json_data
1264
- src_audio_path = json_data["audio_path"]
1265
- elif extend_source == "last_extend":
1266
- json_data = extend_input_params_json
1267
- src_audio_path = json_data["audio_path"]
1268
-
1269
- repaint_start = -left_extend_length
1270
- repaint_end = json_data["audio_duration"] + right_extend_length
1271
- return enhanced_process_func(
1272
- json_data["audio_duration"],
1273
- prompt,
1274
- lyrics,
1275
- infer_step,
1276
- guidance_scale,
1277
- scheduler_type,
1278
- cfg_type,
1279
- omega_scale,
1280
- manual_seeds,
1281
- guidance_interval,
1282
- guidance_interval_decay,
1283
- min_guidance_scale,
1284
- use_erg_tag,
1285
- use_erg_lyric,
1286
- use_erg_diffusion,
1287
- oss_steps,
1288
- guidance_scale_text,
1289
- guidance_scale_lyric,
1290
- retake_seeds=extend_seeds,
1291
- retake_variance=1.0,
1292
- task="extend",
1293
- repaint_start=repaint_start,
1294
- repaint_end=repaint_end,
1295
- src_audio_path=src_audio_path,
1296
- lora_name_or_path="none"
1297
- )
1298
-
1299
- extend_bnt.click(
1300
- fn=extend_process_func,
1301
- inputs=[
1302
- input_params_json,
1303
- extend_input_params_json,
1304
- extend_seeds,
1305
- left_extend_length,
1306
- right_extend_length,
1307
- extend_source,
1308
- extend_source_audio_upload,
1309
- prompt,
1310
- lyrics,
1311
- infer_step,
1312
- guidance_scale,
1313
- scheduler_type,
1314
- cfg_type,
1315
- omega_scale,
1316
- manual_seeds,
1317
- guidance_interval,
1318
- guidance_interval_decay,
1319
- min_guidance_scale,
1320
- use_erg_tag,
1321
- use_erg_lyric,
1322
- use_erg_diffusion,
1323
- oss_steps,
1324
- guidance_scale_text,
1325
- guidance_scale_lyric,
1326
- ],
1327
- outputs=extend_outputs + [extend_input_params_json],
1328
- )
1329
-
1330
- def json2output(json_data):
1331
- return (
1332
- json_data["audio_duration"],
1333
- json_data["prompt"],
1334
- json_data["lyrics"],
1335
- json_data["infer_step"],
1336
- json_data["guidance_scale"],
1337
- json_data["scheduler_type"],
1338
- json_data["cfg_type"],
1339
- json_data["omega_scale"],
1340
- ", ".join(map(str, json_data["actual_seeds"])),
1341
- json_data["guidance_interval"],
1342
- json_data["guidance_interval_decay"],
1343
- json_data["min_guidance_scale"],
1344
- json_data["use_erg_tag"],
1345
- json_data["use_erg_lyric"],
1346
- json_data["use_erg_diffusion"],
1347
- ", ".join(map(str, json_data["oss_steps"])),
1348
- (
1349
- json_data["guidance_scale_text"]
1350
- if "guidance_scale_text" in json_data
1351
- else 0.0
1352
- ),
1353
- (
1354
- json_data["guidance_scale_lyric"]
1355
- if "guidance_scale_lyric" in json_data
1356
- else 0.0
1357
- ),
1358
- (
1359
- json_data["audio2audio_enable"]
1360
- if "audio2audio_enable" in json_data
1361
- else False
1362
- ),
1363
- (
1364
- json_data["ref_audio_strength"]
1365
- if "ref_audio_strength" in json_data
1366
- else 0.5
1367
- ),
1368
- (
1369
- json_data["ref_audio_input"]
1370
- if "ref_audio_input" in json_data
1371
- else None
1372
- ),
1373
- )
1374
-
1375
- def sample_data(lora_name_or_path_):
1376
- if sample_data_func:
1377
- json_data = sample_data_func(lora_name_or_path_)
1378
- return json2output(json_data)
1379
- return {}
1380
-
1381
- sample_bnt.click(
1382
- sample_data,
1383
- inputs=[lora_name_or_path],
1384
- outputs=[
1385
- audio_duration,
1386
- prompt,
1387
- lyrics,
1388
- infer_step,
1389
- guidance_scale,
1390
- scheduler_type,
1391
- cfg_type,
1392
- omega_scale,
1393
- manual_seeds,
1394
- guidance_interval,
1395
- guidance_interval_decay,
1396
- min_guidance_scale,
1397
- use_erg_tag,
1398
- use_erg_lyric,
1399
- use_erg_diffusion,
1400
- oss_steps,
1401
- guidance_scale_text,
1402
- guidance_scale_lyric,
1403
- audio2audio_enable,
1404
- ref_audio_strength,
1405
- ref_audio_input,
1406
- ],
1407
- )
1408
-
1409
- # 메인 생성 버튼 이벤트 (향상된 함수 사용)
1410
- text2music_bnt.click(
1411
- fn=enhanced_process_func,
1412
- inputs=[
1413
- audio_duration,
1414
- prompt,
1415
- lyrics,
1416
- infer_step,
1417
- guidance_scale,
1418
- scheduler_type,
1419
- cfg_type,
1420
- omega_scale,
1421
- manual_seeds,
1422
- guidance_interval,
1423
- guidance_interval_decay,
1424
- min_guidance_scale,
1425
- use_erg_tag,
1426
- use_erg_lyric,
1427
- use_erg_diffusion,
1428
- oss_steps,
1429
- guidance_scale_text,
1430
- guidance_scale_lyric,
1431
- audio2audio_enable,
1432
- ref_audio_strength,
1433
- ref_audio_input,
1434
- lora_name_or_path,
1435
- multi_seed_mode,
1436
- enable_smart_enhancement,
1437
- genre_preset
1438
- ],
1439
- outputs=outputs + [input_params_json],
1440
- )
1441
-
1442
-
1443
- def create_main_demo_ui(
1444
- text2music_process_func=dump_func,
1445
- sample_data_func=dump_func,
1446
- load_data_func=dump_func,
1447
- ):
1448
- with gr.Blocks(
1449
- title="ACE-Step Model 1.0 DEMO - Enhanced with AI Lyrics",
1450
- theme=gr.themes.Soft(),
1451
- css="""
1452
- .gradio-container {
1453
- max-width: 1200px !important;
1454
- }
1455
- .quality-info {
1456
- background: linear-gradient(45deg, #f0f8ff, #e6f3ff);
1457
- padding: 10px;
1458
- border-radius: 8px;
1459
- margin: 5px 0;
1460
- }
1461
- .ai-lyrics-section {
1462
- background: linear-gradient(45deg, #f0fff0, #e6ffe6);
1463
- padding: 15px;
1464
- border-radius: 10px;
1465
- margin: 10px 0;
1466
- border: 2px solid #90EE90;
1467
- }
1468
- """
1469
- ) as demo:
1470
- gr.Markdown(
1471
- """
1472
- <h1 style="text-align: center;">🎵 ACE-Step PRO with AI Lyrics</h1>
1473
- <div style="text-align: center; margin: 20px;">
1474
- <p><strong>🚀 새로운 기능:</strong> 🤖 AI 작사 | 품질 프리셋 | 다중 생성 | 스마트 프롬프트 | 실시간 프리뷰 | 품질 점수</p>
1475
- <p>
1476
- <a href="https://ace-step.github.io/" target='_blank'>Project</a> |
1477
- <a href="https://huggingface.co/ACE-Step/ACE-Step-v1-3.5B">Checkpoints</a> |
1478
- <a href="https://discord.gg/rjAZz2xBdG" target='_blank'>Discord</a>
1479
- </p>
1480
- </div>
1481
- """
1482
- )
1483
-
1484
- # 사용법 가이드 추가
1485
- with gr.Accordion("📖 사용법 가이드", open=False):
1486
- gr.Markdown("""
1487
- ### 🎯 빠른 시작
1488
- 1. **🤖 AI 작사**: 주제를 입력하고 'AI 작사' 버튼을 클릭하면 자동으로 가사가 생성됩니다
1489
- 2. **장르 선택**: 원하는 음악 장르를 선택하면 자동으로 최적화된 태그가 적용됩니다
1490
- 3. **품질 설정**: Draft(빠름) → Standard(권장) → High Quality → Ultra 중 선택
1491
- 4. **다중 생성**: "Best of 3/5/10" 선택하면 여러 번 생성하여 최고 품질을 자동 선택합니다
1492
- 5. **프리뷰**: 전체 생성 전 10초 프리뷰로 빠르게 확인할 수 있습니다
1493
-
1494
- ### 🤖 AI 작사 기능
1495
- - **다국어 지원**: 한국어, 영어 등 입력 언어와 동일한 언어로 가사 생성
1496
- - **주제 예시**: "첫사랑의 설렘", "이별의 아픔", "군대가는 남자의 한숨", "희망찬 내일"
1497
- - **구조 태그**: [verse], [chorus], [bridge] 태그가 자동으로 포함됩니다
1498
- - **장르 연동**: 선택한 장르에 맞는 스타일의 가사가 생성됩니다
1499
-
1500
- ### 💡 품질 향상 팁
1501
- - **고품질 생성**: "High Quality" + "Best of 5" 조합 추천
1502
- - **빠른 테스트**: "Draft" + "프리뷰" 기능 활용
1503
- - **장르 특화**: 장르 프리셋 선택 후 "스마트 향상" 체크
1504
- - **가사 구조**: [verse], [chorus], [bridge] 태그 적극 활용
1505
-
1506
- ### ⚙️ API 설정
1507
- AI 작사 기능을 사용하려면 환경변수에 OpenAI API 키를 설정해야 합니다:
1508
- ```bash
1509
- export LLM_API="your-openai-api-key"
1510
- ```
1511
- """)
1512
-
1513
- with gr.Tab("🎵 Enhanced Text2Music with AI Lyrics"):
1514
- create_text2music_ui(
1515
- gr=gr,
1516
- text2music_process_func=text2music_process_func,
1517
- sample_data_func=sample_data_func,
1518
- load_data_func=load_data_func,
1519
- )
1520
- return demo
1521
-
1522
-
1523
- if __name__ == "__main__":
1524
- print("🚀 ACE-Step PRO with AI Lyrics 시작 중...")
1525
-
1526
- # API 키 상태 확인
1527
- if client_available:
1528
- print("✅ OpenAI API 사용 가능 - AI 작사 기능 활성화됨")
1529
- else:
1530
- print("❌ OpenAI API 사용 불가 - 환경변수를 확인하세요")
1531
- print("설정 방법: export LLM_API='your-openai-api-key'")
1532
-
1533
- demo = create_main_demo_ui()
1534
- demo.launch(
1535
- server_name="0.0.0.0",
1536
- server_port=7860,
1537
- share=True # 공유 링크 생성
1538
- )