Gregniuki commited on
Commit
ef99879
·
verified ·
1 Parent(s): 6a25cb7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -49,7 +49,7 @@ device = (
49
  if device == "cuda":
50
  dtype = torch.float16
51
  elif device == "cpu":
52
- dtype = torch.bfloat16
53
  else:
54
  dtype = torch.float32
55
 
@@ -61,7 +61,7 @@ print(f"Using device: {device}, dtype: {dtype}")
61
  pipe = pipeline(
62
  "automatic-speech-recognition",
63
  model="openai/whisper-large-v3-turbo",
64
- torch_dtype=torch.float16,
65
  device=device,
66
  )
67
  #vocos = Vocos.from_pretrained("charactr/vocos-mel-24khz")
@@ -242,7 +242,7 @@ def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence,
242
  generated_waves.append(generated_wave)
243
  # spectrograms.append(generated_mel_spec[0].cpu().numpy())
244
  # Ensure generated_mel_spec is in a compatible dtype (e.g., float32) before passing it to numpy
245
- generated_mel_spec = generated_mel_spec.to(dtype=torch.float32) # Convert to float32 if it's in bfloat16
246
 
247
  # Proceed with the rest of your operations
248
  spectrograms.append(generated_mel_spec[0].cpu().numpy())
 
49
  if device == "cuda":
50
  dtype = torch.float16
51
  elif device == "cpu":
52
+ dtype = torch.float32
53
  else:
54
  dtype = torch.float32
55
 
 
61
  pipe = pipeline(
62
  "automatic-speech-recognition",
63
  model="openai/whisper-large-v3-turbo",
64
+ torch_dtype=torch.float32,
65
  device=device,
66
  )
67
  #vocos = Vocos.from_pretrained("charactr/vocos-mel-24khz")
 
242
  generated_waves.append(generated_wave)
243
  # spectrograms.append(generated_mel_spec[0].cpu().numpy())
244
  # Ensure generated_mel_spec is in a compatible dtype (e.g., float32) before passing it to numpy
245
+ # generated_mel_spec = generated_mel_spec.to(dtype=torch.float32) # Convert to float32 if it's in bfloat16
246
 
247
  # Proceed with the rest of your operations
248
  spectrograms.append(generated_mel_spec[0].cpu().numpy())