quazim commited on
Commit
346c69d
·
1 Parent(s): 836dde3
Files changed (1) hide show
  1. app.py +42 -17
app.py CHANGED
@@ -3,9 +3,10 @@ import torch
3
  import gc
4
  import numpy as np
5
  import random
6
- from transformers import AutoProcessor, pipeline
7
  import os
8
  os.environ['ELASTIC_LOG_LEVEL'] = 'DEBUG'
 
9
  from elastic_models.transformers import MusicgenForConditionalGeneration
10
 
11
  def set_seed(seed: int = 42):
@@ -24,6 +25,7 @@ def cleanup_gpu():
24
  torch.cuda.synchronize()
25
  gc.collect()
26
 
 
27
  _generator = None
28
  _processor = None
29
 
@@ -40,8 +42,7 @@ def load_model():
40
 
41
  print("[MODEL] Loading processor...")
42
  _processor = AutoProcessor.from_pretrained(
43
- "facebook/musicgen-large",
44
- cache_dir="/mnt/fs/huggingface_cache/"
45
  )
46
 
47
  print("[MODEL] Loading model...")
@@ -64,7 +65,7 @@ def load_model():
64
  )
65
 
66
  print("[MODEL] Model initialization completed successfully")
67
-
68
  return _generator, _processor
69
 
70
  def calculate_max_tokens(duration_seconds):
@@ -74,7 +75,6 @@ def calculate_max_tokens(duration_seconds):
74
  return max_new_tokens
75
 
76
  def generate_music(text_prompt, duration=10, guidance_scale=3.0):
77
- """Generate music based on text prompt using pipeline"""
78
  try:
79
  generator, processor = load_model()
80
 
@@ -84,7 +84,10 @@ def generate_music(text_prompt, duration=10, guidance_scale=3.0):
84
  print(f"[GENERATION] Guidance scale: {guidance_scale}")
85
 
86
  cleanup_gpu()
 
 
87
  set_seed(42)
 
88
 
89
  max_new_tokens = calculate_max_tokens(duration)
90
 
@@ -112,8 +115,25 @@ def generate_music(text_prompt, duration=10, guidance_scale=3.0):
112
  print(f"[GENERATION] Audio shape: {audio_data.shape}")
113
  print(f"[GENERATION] Sample rate: {sample_rate}")
114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  audio_data = audio_data.astype(np.float32)
116
 
 
 
 
117
  return sample_rate, audio_data
118
 
119
  except Exception as e:
@@ -121,7 +141,8 @@ def generate_music(text_prompt, duration=10, guidance_scale=3.0):
121
  cleanup_gpu()
122
  return None, None
123
 
124
- with gr.Blocks(title="MusicGen Large - Music Generation", theme=gr.themes.Soft()) as demo:
 
125
  gr.Markdown("# 🎵 MusicGen Large Music Generator")
126
  gr.Markdown("Generate music from text descriptions using Facebook's MusicGen Large model with elastic compression.")
127
 
@@ -156,7 +177,9 @@ with gr.Blocks(title="MusicGen Large - Music Generation", theme=gr.themes.Soft()
156
  with gr.Column():
157
  audio_output = gr.Audio(
158
  label="Generated Music",
159
- type="numpy"
 
 
160
  )
161
 
162
  with gr.Accordion("Tips", open=False):
@@ -170,21 +193,23 @@ with gr.Blocks(title="MusicGen Large - Music Generation", theme=gr.themes.Soft()
170
  generate_btn.click(
171
  fn=generate_music,
172
  inputs=[text_input, duration, guidance_scale],
173
- outputs=audio_output
 
174
  )
175
 
 
176
  gr.Examples(
177
  examples=[
178
- ["A groovy funk bassline with a tight drum beat", 10, 3.0],
179
- ["Relaxing acoustic guitar melody", 15, 3.0],
180
- ["Electronic dance music with heavy bass", 10, 4.0],
181
- ["Classical violin concerto", 20, 3.5],
182
- ["Reggae with steel drums and bass", 12, 3.0],
183
- ["Rock ballad with electric guitar solo", 15, 3.5],
184
- ["Jazz piano improvisation with brushed drums", 18, 3.0],
185
- ["Ambient synthwave with retro vibes", 25, 2.5],
186
  ],
187
- inputs=[text_input, duration, guidance_scale],
188
  label="Example Prompts"
189
  )
190
 
 
3
  import gc
4
  import numpy as np
5
  import random
6
+ import tempfile
7
  import os
8
  os.environ['ELASTIC_LOG_LEVEL'] = 'DEBUG'
9
+ from transformers import AutoProcessor, pipeline
10
  from elastic_models.transformers import MusicgenForConditionalGeneration
11
 
12
  def set_seed(seed: int = 42):
 
25
  torch.cuda.synchronize()
26
  gc.collect()
27
 
28
+ # Global variables for model caching with thread lock
29
  _generator = None
30
  _processor = None
31
 
 
42
 
43
  print("[MODEL] Loading processor...")
44
  _processor = AutoProcessor.from_pretrained(
45
+ "facebook/musicgen-large"
 
46
  )
47
 
48
  print("[MODEL] Loading model...")
 
65
  )
66
 
67
  print("[MODEL] Model initialization completed successfully")
68
+
69
  return _generator, _processor
70
 
71
  def calculate_max_tokens(duration_seconds):
 
75
  return max_new_tokens
76
 
77
  def generate_music(text_prompt, duration=10, guidance_scale=3.0):
 
78
  try:
79
  generator, processor = load_model()
80
 
 
84
  print(f"[GENERATION] Guidance scale: {guidance_scale}")
85
 
86
  cleanup_gpu()
87
+
88
+ import time
89
  set_seed(42)
90
+ print(f"[GENERATION] Using seed: {42}")
91
 
92
  max_new_tokens = calculate_max_tokens(duration)
93
 
 
115
  print(f"[GENERATION] Audio shape: {audio_data.shape}")
116
  print(f"[GENERATION] Sample rate: {sample_rate}")
117
 
118
+ # Fix audio format for Gradio display
119
+ if len(audio_data.shape) > 1:
120
+ # If stereo or multi-channel, take first channel
121
+ audio_data = audio_data[0] if audio_data.shape[0] < audio_data.shape[1] else audio_data[:, 0]
122
+
123
+ # Ensure it's 1D
124
+ audio_data = audio_data.flatten()
125
+
126
+ # Normalize audio to prevent clipping
127
+ max_val = np.max(np.abs(audio_data))
128
+ if max_val > 0:
129
+ audio_data = audio_data / max_val * 0.95 # Scale to 95% to avoid clipping
130
+
131
+ # Convert to float32 for Gradio
132
  audio_data = audio_data.astype(np.float32)
133
 
134
+ print(f"[GENERATION] Final audio shape: {audio_data.shape}")
135
+ print(f"[GENERATION] Audio range: [{np.min(audio_data):.3f}, {np.max(audio_data):.3f}]")
136
+
137
  return sample_rate, audio_data
138
 
139
  except Exception as e:
 
141
  cleanup_gpu()
142
  return None, None
143
 
144
+
145
+ with gr.Blocks(title="MusicGen Large - Music Generation") as demo:
146
  gr.Markdown("# 🎵 MusicGen Large Music Generator")
147
  gr.Markdown("Generate music from text descriptions using Facebook's MusicGen Large model with elastic compression.")
148
 
 
177
  with gr.Column():
178
  audio_output = gr.Audio(
179
  label="Generated Music",
180
+ type="numpy",
181
+ format="wav",
182
+ interactive=False
183
  )
184
 
185
  with gr.Accordion("Tips", open=False):
 
193
  generate_btn.click(
194
  fn=generate_music,
195
  inputs=[text_input, duration, guidance_scale],
196
+ outputs=audio_output,
197
+ show_progress=True
198
  )
199
 
200
+ # Example prompts - only text prompts now
201
  gr.Examples(
202
  examples=[
203
+ "A groovy funk bassline with a tight drum beat",
204
+ "Relaxing acoustic guitar melody",
205
+ "Electronic dance music with heavy bass",
206
+ "Classical violin concerto",
207
+ "Reggae with steel drums and bass",
208
+ "Rock ballad with electric guitar solo",
209
+ "Jazz piano improvisation with brushed drums",
210
+ "Ambient synthwave with retro vibes",
211
  ],
212
+ inputs=text_input,
213
  label="Example Prompts"
214
  )
215