Spaces:
Running
on
L40S
Running
on
L40S
updated
Browse files
app.py
CHANGED
@@ -3,7 +3,6 @@ import torch
|
|
3 |
import gc
|
4 |
import numpy as np
|
5 |
import random
|
6 |
-
import tempfile
|
7 |
import os
|
8 |
os.environ['ELASTIC_LOG_LEVEL'] = 'DEBUG'
|
9 |
from transformers import AutoProcessor, pipeline
|
@@ -25,7 +24,6 @@ def cleanup_gpu():
|
|
25 |
torch.cuda.synchronize()
|
26 |
gc.collect()
|
27 |
|
28 |
-
# Global variables for model caching with thread lock
|
29 |
_generator = None
|
30 |
_processor = None
|
31 |
|
@@ -115,20 +113,16 @@ def generate_music(text_prompt, duration=10, guidance_scale=3.0):
|
|
115 |
print(f"[GENERATION] Audio shape: {audio_data.shape}")
|
116 |
print(f"[GENERATION] Sample rate: {sample_rate}")
|
117 |
|
118 |
-
# Fix audio format for Gradio display
|
119 |
if len(audio_data.shape) > 1:
|
120 |
# If stereo or multi-channel, take first channel
|
121 |
audio_data = audio_data[0] if audio_data.shape[0] < audio_data.shape[1] else audio_data[:, 0]
|
122 |
|
123 |
-
# Ensure it's 1D
|
124 |
audio_data = audio_data.flatten()
|
125 |
|
126 |
-
# Normalize audio to prevent clipping
|
127 |
max_val = np.max(np.abs(audio_data))
|
128 |
if max_val > 0:
|
129 |
audio_data = audio_data / max_val * 0.95 # Scale to 95% to avoid clipping
|
130 |
|
131 |
-
# Convert to float32 for Gradio
|
132 |
audio_data = audio_data.astype(np.float32)
|
133 |
|
134 |
print(f"[GENERATION] Final audio shape: {audio_data.shape}")
|
@@ -139,7 +133,7 @@ def generate_music(text_prompt, duration=10, guidance_scale=3.0):
|
|
139 |
except Exception as e:
|
140 |
print(f"[ERROR] Generation failed: {str(e)}")
|
141 |
cleanup_gpu()
|
142 |
-
return None
|
143 |
|
144 |
|
145 |
with gr.Blocks(title="MusicGen Large - Music Generation") as demo:
|
@@ -177,7 +171,7 @@ with gr.Blocks(title="MusicGen Large - Music Generation") as demo:
|
|
177 |
with gr.Column():
|
178 |
audio_output = gr.Audio(
|
179 |
label="Generated Music",
|
180 |
-
type="
|
181 |
format="wav",
|
182 |
interactive=False
|
183 |
)
|
@@ -197,7 +191,6 @@ with gr.Blocks(title="MusicGen Large - Music Generation") as demo:
|
|
197 |
show_progress=True
|
198 |
)
|
199 |
|
200 |
-
# Example prompts - only text prompts now
|
201 |
gr.Examples(
|
202 |
examples=[
|
203 |
"A groovy funk bassline with a tight drum beat",
|
|
|
3 |
import gc
|
4 |
import numpy as np
|
5 |
import random
|
|
|
6 |
import os
|
7 |
os.environ['ELASTIC_LOG_LEVEL'] = 'DEBUG'
|
8 |
from transformers import AutoProcessor, pipeline
|
|
|
24 |
torch.cuda.synchronize()
|
25 |
gc.collect()
|
26 |
|
|
|
27 |
_generator = None
|
28 |
_processor = None
|
29 |
|
|
|
113 |
print(f"[GENERATION] Audio shape: {audio_data.shape}")
|
114 |
print(f"[GENERATION] Sample rate: {sample_rate}")
|
115 |
|
|
|
116 |
if len(audio_data.shape) > 1:
|
117 |
# If stereo or multi-channel, take first channel
|
118 |
audio_data = audio_data[0] if audio_data.shape[0] < audio_data.shape[1] else audio_data[:, 0]
|
119 |
|
|
|
120 |
audio_data = audio_data.flatten()
|
121 |
|
|
|
122 |
max_val = np.max(np.abs(audio_data))
|
123 |
if max_val > 0:
|
124 |
audio_data = audio_data / max_val * 0.95 # Scale to 95% to avoid clipping
|
125 |
|
|
|
126 |
audio_data = audio_data.astype(np.float32)
|
127 |
|
128 |
print(f"[GENERATION] Final audio shape: {audio_data.shape}")
|
|
|
133 |
except Exception as e:
|
134 |
print(f"[ERROR] Generation failed: {str(e)}")
|
135 |
cleanup_gpu()
|
136 |
+
return None
|
137 |
|
138 |
|
139 |
with gr.Blocks(title="MusicGen Large - Music Generation") as demo:
|
|
|
171 |
with gr.Column():
|
172 |
audio_output = gr.Audio(
|
173 |
label="Generated Music",
|
174 |
+
type="filepath",
|
175 |
format="wav",
|
176 |
interactive=False
|
177 |
)
|
|
|
191 |
show_progress=True
|
192 |
)
|
193 |
|
|
|
194 |
gr.Examples(
|
195 |
examples=[
|
196 |
"A groovy funk bassline with a tight drum beat",
|