Spaces:

TheStageAI
/

Elastic-musicgen-large

Running on L40S

App Files Files Community

quazim commited on Jun 27

Commit

f503040

1 Parent(s): b2d3523

updated

Browse files

Files changed (1) hide show

app.py +9 -14

app.py CHANGED Viewed

@@ -161,11 +161,12 @@ def generate_music(text_prompt, duration=10, guidance_scale=3.0):
         max_val = np.max(np.abs(audio_data))
         if max_val > 0:
             audio_data = audio_data / max_val * 0.95  # Scale to 95% to avoid clipping
-        audio_data = audio_data.astype(np.float32)
         print(f"[GENERATION] Final audio shape: {audio_data.shape}")
-        print(f"[GENERATION] Audio range: [{np.min(audio_data):.3f}, {np.max(audio_data):.3f}]")
         print(f"[GENERATION] Sample rate: {sample_rate}")
         timestamp = int(time.time() * 1000)
@@ -178,12 +179,13 @@ def generate_music(text_prompt, duration=10, guidance_scale=3.0):
             file_size = os.path.getsize(temp_path)
             print(f"[GENERATION] Audio saved to: {temp_path}")
             print(f"[GENERATION] File size: {file_size} bytes")
         else:
             print(f"[ERROR] Failed to create audio file: {temp_path}")
             return None
-        return temp_path
     except Exception as e:
         print(f"[ERROR] Generation failed: {str(e)}")
         cleanup_gpu()
@@ -225,14 +227,7 @@ with gr.Blocks(title="MusicGen Large - Music Generation") as demo:
         with gr.Column():
             audio_output = gr.Audio(
                 label="Generated Music",
-                type="filepath",
-                format="wav",
-                interactive=False,
-                autoplay=True,
-                show_download_button=True,
-                waveform_options=gr.WaveformOptions(
-                    show_recording_waveform=True
-                )
             )
             with gr.Accordion("Tips", open=False):

         max_val = np.max(np.abs(audio_data))
         if max_val > 0:
             audio_data = audio_data / max_val * 0.95  # Scale to 95% to avoid clipping
+        audio_data = (audio_data * 32767).astype(np.int16). ###
         print(f"[GENERATION] Final audio shape: {audio_data.shape}")
+        print(f"[GENERATION] Audio range: [{np.min(audio_data)}, {np.max(audio_data)}]")
+        print(f"[GENERATION] Audio dtype: {audio_data.dtype}")
         print(f"[GENERATION] Sample rate: {sample_rate}")
         timestamp = int(time.time() * 1000)
             file_size = os.path.getsize(temp_path)
             print(f"[GENERATION] Audio saved to: {temp_path}")
             print(f"[GENERATION] File size: {file_size} bytes")
+            print(f"[GENERATION] Returning numpy tuple: ({sample_rate}, audio_array)")
+            return (sample_rate, audio_data)
         else:
             print(f"[ERROR] Failed to create audio file: {temp_path}")
             return None
     except Exception as e:
         print(f"[ERROR] Generation failed: {str(e)}")
         cleanup_gpu()
         with gr.Column():
             audio_output = gr.Audio(
                 label="Generated Music",
+                type="numpy"
             )
             with gr.Accordion("Tips", open=False):