Spaces:

snnithya
/

GaMaDHaNi

Running on Zero

App Files Files Community

snnithya commited on Dec 9, 2024

Commit

41c633a

1 Parent(s): 4fc6f5b

fixed gpu access problem

Browse files

Files changed (1) hide show

app.py +10 -7

app.py CHANGED Viewed

@@ -53,23 +53,28 @@ def debug_profile(func):
         return pp.profile(sort_by='cumulative', out_lines=10)(func)
     return func
 def extract_pitch(audio, unvoice=True, sr=16000, frame_shift_ms=10, log=True):
     if not isinstance(audio, torch.Tensor):
         audio = torch.Tensor(audio).to(device)
     if len(audio.shape) == 1:
         audio = audio.unsqueeze(0)
     hop_length = int(sr * frame_shift_ms / 1000)
-    f0 = torchcrepe.predict(audio,
                             sr,
                             hop_length=hop_length,
                             model='tiny',
                             device=device,
                             fmin=80,
-                            fmax=800
                             )
-    return f0.squeeze(0)
 def generate_pitch_reinterp(pitch, pitch_model, invert_pitch_fn, num_samples, num_steps, noise_std=0.4, t0=0.5):
     '''Generate pitch values for the melodic reinterpretation task'''
@@ -100,7 +105,7 @@ def generate_audio(audio_model, f0s, invert_audio_fn, singers=[3], num_steps=100
     return audio
-@spaces.GPU(duration=30)
 def generate(pitch, num_samples=1, num_steps=100, singers=[3], outfolder='temp', audio_seq_len=750, pitch_qt=None, type='response', invert_pitch_fn=None, t0=0.5, model_type='diffusion'):
     global pitch_model, audio_model
     # move the models to device
@@ -193,8 +198,6 @@ def container_generate(model_selection, task_selection, audio, singer_id, t0):
     audio = audio[-12*16000:] # consider only last 12 s
     f0 = extract_pitch(audio)
     # move f0 to cpu
-    if f0.device != 'cpu':  #TODO:
-        f0 = f0.cpu()
     mic_f0 = f0.clone() # save the user input pitch values
     logging.log(logging.INFO, 'Pitch extracted')
     f0 = pitch_task_fn(**{

         return pp.profile(sort_by='cumulative', out_lines=10)(func)
     return func
+@spaces.GPU(duration=10)
 def extract_pitch(audio, unvoice=True, sr=16000, frame_shift_ms=10, log=True):
     if not isinstance(audio, torch.Tensor):
         audio = torch.Tensor(audio).to(device)
     if len(audio.shape) == 1:
         audio = audio.unsqueeze(0)
     hop_length = int(sr * frame_shift_ms / 1000)
+    f0, periodicity = torchcrepe.predict(audio,
                             sr,
                             hop_length=hop_length,
                             model='tiny',
                             device=device,
                             fmin=80,
+                            fmax=600,
+                            decoder=torchcrepe.decode.viterbi,
+                            return_periodicity=True
                             )
+    periodicity = torchcrepe.threshold.Silence(-80)(periodicity=periodicity, audio=audio, sample_rate=sr, hop_length=hop_length)
+    f0 = torchcrepe.threshold.At(0.4)(f0, periodicity=periodicity)
+    return f0.squeeze(0).cpu() # temporary hack to allow processing on cpu
 def generate_pitch_reinterp(pitch, pitch_model, invert_pitch_fn, num_samples, num_steps, noise_std=0.4, t0=0.5):
     '''Generate pitch values for the melodic reinterpretation task'''
     return audio
+@spaces.GPU(duration=10)
 def generate(pitch, num_samples=1, num_steps=100, singers=[3], outfolder='temp', audio_seq_len=750, pitch_qt=None, type='response', invert_pitch_fn=None, t0=0.5, model_type='diffusion'):
     global pitch_model, audio_model
     # move the models to device
     audio = audio[-12*16000:] # consider only last 12 s
     f0 = extract_pitch(audio)
     # move f0 to cpu
     mic_f0 = f0.clone() # save the user input pitch values
     logging.log(logging.INFO, 'Pitch extracted')
     f0 = pitch_task_fn(**{