Spaces:
Running
on
Zero
Running
on
Zero
fixed gpu access problem
Browse files
app.py
CHANGED
|
@@ -53,23 +53,28 @@ def debug_profile(func):
|
|
| 53 |
return pp.profile(sort_by='cumulative', out_lines=10)(func)
|
| 54 |
return func
|
| 55 |
|
| 56 |
-
|
| 57 |
def extract_pitch(audio, unvoice=True, sr=16000, frame_shift_ms=10, log=True):
|
| 58 |
if not isinstance(audio, torch.Tensor):
|
| 59 |
audio = torch.Tensor(audio).to(device)
|
| 60 |
if len(audio.shape) == 1:
|
| 61 |
audio = audio.unsqueeze(0)
|
| 62 |
hop_length = int(sr * frame_shift_ms / 1000)
|
| 63 |
-
f0 = torchcrepe.predict(audio,
|
| 64 |
sr,
|
| 65 |
hop_length=hop_length,
|
| 66 |
model='tiny',
|
| 67 |
device=device,
|
| 68 |
fmin=80,
|
| 69 |
-
fmax=
|
|
|
|
|
|
|
| 70 |
)
|
| 71 |
|
| 72 |
-
|
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
def generate_pitch_reinterp(pitch, pitch_model, invert_pitch_fn, num_samples, num_steps, noise_std=0.4, t0=0.5):
|
| 75 |
'''Generate pitch values for the melodic reinterpretation task'''
|
|
@@ -100,7 +105,7 @@ def generate_audio(audio_model, f0s, invert_audio_fn, singers=[3], num_steps=100
|
|
| 100 |
|
| 101 |
return audio
|
| 102 |
|
| 103 |
-
@spaces.GPU(duration=
|
| 104 |
def generate(pitch, num_samples=1, num_steps=100, singers=[3], outfolder='temp', audio_seq_len=750, pitch_qt=None, type='response', invert_pitch_fn=None, t0=0.5, model_type='diffusion'):
|
| 105 |
global pitch_model, audio_model
|
| 106 |
# move the models to device
|
|
@@ -193,8 +198,6 @@ def container_generate(model_selection, task_selection, audio, singer_id, t0):
|
|
| 193 |
audio = audio[-12*16000:] # consider only last 12 s
|
| 194 |
f0 = extract_pitch(audio)
|
| 195 |
# move f0 to cpu
|
| 196 |
-
if f0.device != 'cpu': #TODO:
|
| 197 |
-
f0 = f0.cpu()
|
| 198 |
mic_f0 = f0.clone() # save the user input pitch values
|
| 199 |
logging.log(logging.INFO, 'Pitch extracted')
|
| 200 |
f0 = pitch_task_fn(**{
|
|
|
|
| 53 |
return pp.profile(sort_by='cumulative', out_lines=10)(func)
|
| 54 |
return func
|
| 55 |
|
| 56 |
+
@spaces.GPU(duration=10)
|
| 57 |
def extract_pitch(audio, unvoice=True, sr=16000, frame_shift_ms=10, log=True):
|
| 58 |
if not isinstance(audio, torch.Tensor):
|
| 59 |
audio = torch.Tensor(audio).to(device)
|
| 60 |
if len(audio.shape) == 1:
|
| 61 |
audio = audio.unsqueeze(0)
|
| 62 |
hop_length = int(sr * frame_shift_ms / 1000)
|
| 63 |
+
f0, periodicity = torchcrepe.predict(audio,
|
| 64 |
sr,
|
| 65 |
hop_length=hop_length,
|
| 66 |
model='tiny',
|
| 67 |
device=device,
|
| 68 |
fmin=80,
|
| 69 |
+
fmax=600,
|
| 70 |
+
decoder=torchcrepe.decode.viterbi,
|
| 71 |
+
return_periodicity=True
|
| 72 |
)
|
| 73 |
|
| 74 |
+
periodicity = torchcrepe.threshold.Silence(-80)(periodicity=periodicity, audio=audio, sample_rate=sr, hop_length=hop_length)
|
| 75 |
+
f0 = torchcrepe.threshold.At(0.4)(f0, periodicity=periodicity)
|
| 76 |
+
|
| 77 |
+
return f0.squeeze(0).cpu() # temporary hack to allow processing on cpu
|
| 78 |
|
| 79 |
def generate_pitch_reinterp(pitch, pitch_model, invert_pitch_fn, num_samples, num_steps, noise_std=0.4, t0=0.5):
|
| 80 |
'''Generate pitch values for the melodic reinterpretation task'''
|
|
|
|
| 105 |
|
| 106 |
return audio
|
| 107 |
|
| 108 |
+
@spaces.GPU(duration=10)
|
| 109 |
def generate(pitch, num_samples=1, num_steps=100, singers=[3], outfolder='temp', audio_seq_len=750, pitch_qt=None, type='response', invert_pitch_fn=None, t0=0.5, model_type='diffusion'):
|
| 110 |
global pitch_model, audio_model
|
| 111 |
# move the models to device
|
|
|
|
| 198 |
audio = audio[-12*16000:] # consider only last 12 s
|
| 199 |
f0 = extract_pitch(audio)
|
| 200 |
# move f0 to cpu
|
|
|
|
|
|
|
| 201 |
mic_f0 = f0.clone() # save the user input pitch values
|
| 202 |
logging.log(logging.INFO, 'Pitch extracted')
|
| 203 |
f0 = pitch_task_fn(**{
|