quazim commited on
Commit
b2d3523
·
1 Parent(s): bb422a5
Files changed (1) hide show
  1. app.py +33 -13
app.py CHANGED
@@ -132,20 +132,20 @@ def generate_music(text_prompt, duration=10, guidance_scale=3.0):
132
  output = outputs[0]
133
  audio_data = output['audio']
134
  sample_rate = output['sampling_rate']
135
-
136
  print(f"[GENERATION] Audio shape: {audio_data.shape}")
137
  print(f"[GENERATION] Sample rate: {sample_rate}")
138
  print(f"[GENERATION] Audio dtype: {audio_data.dtype}")
139
  print(f"[GENERATION] Audio is numpy: {type(audio_data)}")
140
-
141
  if hasattr(audio_data, 'cpu'):
142
  audio_data = audio_data.cpu().numpy()
143
-
144
  print(f"[GENERATION] Audio shape after tensor conversion: {audio_data.shape}")
145
-
146
  if len(audio_data.shape) == 3:
147
  audio_data = audio_data[0]
148
-
149
  if len(audio_data.shape) == 2:
150
  if audio_data.shape[0] < audio_data.shape[1]:
151
  audio_data = audio_data.T
@@ -153,22 +153,36 @@ def generate_music(text_prompt, duration=10, guidance_scale=3.0):
153
  audio_data = audio_data[:, 0]
154
  else:
155
  audio_data = audio_data.flatten()
156
-
157
  audio_data = audio_data.flatten()
158
-
159
  print(f"[GENERATION] Audio shape after flattening: {audio_data.shape}")
160
-
161
  max_val = np.max(np.abs(audio_data))
162
  if max_val > 0:
163
  audio_data = audio_data / max_val * 0.95 # Scale to 95% to avoid clipping
164
-
165
  audio_data = audio_data.astype(np.float32)
166
-
167
  print(f"[GENERATION] Final audio shape: {audio_data.shape}")
168
  print(f"[GENERATION] Audio range: [{np.min(audio_data):.3f}, {np.max(audio_data):.3f}]")
169
  print(f"[GENERATION] Sample rate: {sample_rate}")
170
-
171
- return (sample_rate, audio_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
 
173
  except Exception as e:
174
  print(f"[ERROR] Generation failed: {str(e)}")
@@ -211,8 +225,14 @@ with gr.Blocks(title="MusicGen Large - Music Generation") as demo:
211
  with gr.Column():
212
  audio_output = gr.Audio(
213
  label="Generated Music",
214
- type="numpy",
 
215
  interactive=False,
 
 
 
 
 
216
  )
217
 
218
  with gr.Accordion("Tips", open=False):
 
132
  output = outputs[0]
133
  audio_data = output['audio']
134
  sample_rate = output['sampling_rate']
135
+
136
  print(f"[GENERATION] Audio shape: {audio_data.shape}")
137
  print(f"[GENERATION] Sample rate: {sample_rate}")
138
  print(f"[GENERATION] Audio dtype: {audio_data.dtype}")
139
  print(f"[GENERATION] Audio is numpy: {type(audio_data)}")
140
+
141
  if hasattr(audio_data, 'cpu'):
142
  audio_data = audio_data.cpu().numpy()
143
+
144
  print(f"[GENERATION] Audio shape after tensor conversion: {audio_data.shape}")
145
+
146
  if len(audio_data.shape) == 3:
147
  audio_data = audio_data[0]
148
+
149
  if len(audio_data.shape) == 2:
150
  if audio_data.shape[0] < audio_data.shape[1]:
151
  audio_data = audio_data.T
 
153
  audio_data = audio_data[:, 0]
154
  else:
155
  audio_data = audio_data.flatten()
156
+
157
  audio_data = audio_data.flatten()
158
+
159
  print(f"[GENERATION] Audio shape after flattening: {audio_data.shape}")
160
+
161
  max_val = np.max(np.abs(audio_data))
162
  if max_val > 0:
163
  audio_data = audio_data / max_val * 0.95 # Scale to 95% to avoid clipping
164
+
165
  audio_data = audio_data.astype(np.float32)
166
+
167
  print(f"[GENERATION] Final audio shape: {audio_data.shape}")
168
  print(f"[GENERATION] Audio range: [{np.min(audio_data):.3f}, {np.max(audio_data):.3f}]")
169
  print(f"[GENERATION] Sample rate: {sample_rate}")
170
+
171
+ timestamp = int(time.time() * 1000)
172
+ temp_filename = f"generated_music_{timestamp}.wav"
173
+ temp_path = os.path.join(tempfile.gettempdir(), temp_filename)
174
+
175
+ sf.write(temp_path, audio_data, sample_rate)
176
+
177
+ if os.path.exists(temp_path):
178
+ file_size = os.path.getsize(temp_path)
179
+ print(f"[GENERATION] Audio saved to: {temp_path}")
180
+ print(f"[GENERATION] File size: {file_size} bytes")
181
+ else:
182
+ print(f"[ERROR] Failed to create audio file: {temp_path}")
183
+ return None
184
+
185
+ return temp_path
186
 
187
  except Exception as e:
188
  print(f"[ERROR] Generation failed: {str(e)}")
 
225
  with gr.Column():
226
  audio_output = gr.Audio(
227
  label="Generated Music",
228
+ type="filepath",
229
+ format="wav",
230
  interactive=False,
231
+ autoplay=True,
232
+ show_download_button=True,
233
+ waveform_options=gr.WaveformOptions(
234
+ show_recording_waveform=True
235
+ )
236
  )
237
 
238
  with gr.Accordion("Tips", open=False):