Spaces:
Running
on
L40S
Running
on
L40S
updated
Browse files
app.py
CHANGED
@@ -132,20 +132,20 @@ def generate_music(text_prompt, duration=10, guidance_scale=3.0):
|
|
132 |
output = outputs[0]
|
133 |
audio_data = output['audio']
|
134 |
sample_rate = output['sampling_rate']
|
135 |
-
|
136 |
print(f"[GENERATION] Audio shape: {audio_data.shape}")
|
137 |
print(f"[GENERATION] Sample rate: {sample_rate}")
|
138 |
print(f"[GENERATION] Audio dtype: {audio_data.dtype}")
|
139 |
print(f"[GENERATION] Audio is numpy: {type(audio_data)}")
|
140 |
-
|
141 |
if hasattr(audio_data, 'cpu'):
|
142 |
audio_data = audio_data.cpu().numpy()
|
143 |
-
|
144 |
print(f"[GENERATION] Audio shape after tensor conversion: {audio_data.shape}")
|
145 |
-
|
146 |
if len(audio_data.shape) == 3:
|
147 |
audio_data = audio_data[0]
|
148 |
-
|
149 |
if len(audio_data.shape) == 2:
|
150 |
if audio_data.shape[0] < audio_data.shape[1]:
|
151 |
audio_data = audio_data.T
|
@@ -153,22 +153,36 @@ def generate_music(text_prompt, duration=10, guidance_scale=3.0):
|
|
153 |
audio_data = audio_data[:, 0]
|
154 |
else:
|
155 |
audio_data = audio_data.flatten()
|
156 |
-
|
157 |
audio_data = audio_data.flatten()
|
158 |
-
|
159 |
print(f"[GENERATION] Audio shape after flattening: {audio_data.shape}")
|
160 |
-
|
161 |
max_val = np.max(np.abs(audio_data))
|
162 |
if max_val > 0:
|
163 |
audio_data = audio_data / max_val * 0.95 # Scale to 95% to avoid clipping
|
164 |
-
|
165 |
audio_data = audio_data.astype(np.float32)
|
166 |
-
|
167 |
print(f"[GENERATION] Final audio shape: {audio_data.shape}")
|
168 |
print(f"[GENERATION] Audio range: [{np.min(audio_data):.3f}, {np.max(audio_data):.3f}]")
|
169 |
print(f"[GENERATION] Sample rate: {sample_rate}")
|
170 |
-
|
171 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
|
173 |
except Exception as e:
|
174 |
print(f"[ERROR] Generation failed: {str(e)}")
|
@@ -211,8 +225,14 @@ with gr.Blocks(title="MusicGen Large - Music Generation") as demo:
|
|
211 |
with gr.Column():
|
212 |
audio_output = gr.Audio(
|
213 |
label="Generated Music",
|
214 |
-
type="
|
|
|
215 |
interactive=False,
|
|
|
|
|
|
|
|
|
|
|
216 |
)
|
217 |
|
218 |
with gr.Accordion("Tips", open=False):
|
|
|
132 |
output = outputs[0]
|
133 |
audio_data = output['audio']
|
134 |
sample_rate = output['sampling_rate']
|
135 |
+
|
136 |
print(f"[GENERATION] Audio shape: {audio_data.shape}")
|
137 |
print(f"[GENERATION] Sample rate: {sample_rate}")
|
138 |
print(f"[GENERATION] Audio dtype: {audio_data.dtype}")
|
139 |
print(f"[GENERATION] Audio is numpy: {type(audio_data)}")
|
140 |
+
|
141 |
if hasattr(audio_data, 'cpu'):
|
142 |
audio_data = audio_data.cpu().numpy()
|
143 |
+
|
144 |
print(f"[GENERATION] Audio shape after tensor conversion: {audio_data.shape}")
|
145 |
+
|
146 |
if len(audio_data.shape) == 3:
|
147 |
audio_data = audio_data[0]
|
148 |
+
|
149 |
if len(audio_data.shape) == 2:
|
150 |
if audio_data.shape[0] < audio_data.shape[1]:
|
151 |
audio_data = audio_data.T
|
|
|
153 |
audio_data = audio_data[:, 0]
|
154 |
else:
|
155 |
audio_data = audio_data.flatten()
|
156 |
+
|
157 |
audio_data = audio_data.flatten()
|
158 |
+
|
159 |
print(f"[GENERATION] Audio shape after flattening: {audio_data.shape}")
|
160 |
+
|
161 |
max_val = np.max(np.abs(audio_data))
|
162 |
if max_val > 0:
|
163 |
audio_data = audio_data / max_val * 0.95 # Scale to 95% to avoid clipping
|
164 |
+
|
165 |
audio_data = audio_data.astype(np.float32)
|
166 |
+
|
167 |
print(f"[GENERATION] Final audio shape: {audio_data.shape}")
|
168 |
print(f"[GENERATION] Audio range: [{np.min(audio_data):.3f}, {np.max(audio_data):.3f}]")
|
169 |
print(f"[GENERATION] Sample rate: {sample_rate}")
|
170 |
+
|
171 |
+
timestamp = int(time.time() * 1000)
|
172 |
+
temp_filename = f"generated_music_{timestamp}.wav"
|
173 |
+
temp_path = os.path.join(tempfile.gettempdir(), temp_filename)
|
174 |
+
|
175 |
+
sf.write(temp_path, audio_data, sample_rate)
|
176 |
+
|
177 |
+
if os.path.exists(temp_path):
|
178 |
+
file_size = os.path.getsize(temp_path)
|
179 |
+
print(f"[GENERATION] Audio saved to: {temp_path}")
|
180 |
+
print(f"[GENERATION] File size: {file_size} bytes")
|
181 |
+
else:
|
182 |
+
print(f"[ERROR] Failed to create audio file: {temp_path}")
|
183 |
+
return None
|
184 |
+
|
185 |
+
return temp_path
|
186 |
|
187 |
except Exception as e:
|
188 |
print(f"[ERROR] Generation failed: {str(e)}")
|
|
|
225 |
with gr.Column():
|
226 |
audio_output = gr.Audio(
|
227 |
label="Generated Music",
|
228 |
+
type="filepath",
|
229 |
+
format="wav",
|
230 |
interactive=False,
|
231 |
+
autoplay=True,
|
232 |
+
show_download_button=True,
|
233 |
+
waveform_options=gr.WaveformOptions(
|
234 |
+
show_recording_waveform=True
|
235 |
+
)
|
236 |
)
|
237 |
|
238 |
with gr.Accordion("Tips", open=False):
|