OpenSound commited on
Commit
0cfeca0
·
verified ·
1 Parent(s): 4b92e60

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -2
app.py CHANGED
@@ -2,7 +2,8 @@ import gradio as gr
2
  import spaces
3
  import yaml
4
  import torch
5
- import librosa
 
6
  from diffusers import DDIMScheduler
7
  from transformers import AutoProcessor, ClapModel
8
  from model.udit import UDiT
@@ -98,7 +99,13 @@ def sample_diffusion(mixture, timbre, ddim_steps=50, eta=0, seed=2023, guidance_
98
  @spaces.GPU
99
  def tse(gt_file_input, text_input, num_infer_steps, eta, seed, guidance_scale, guidance_rescale):
100
  with torch.no_grad():
101
- mixture, _ = librosa.load(gt_file_input, sr=sample_rate)
 
 
 
 
 
 
102
  # Check the length of the audio in samples
103
  current_length = len(mixture)
104
  target_length = sample_rate * 10
 
2
  import spaces
3
  import yaml
4
  import torch
5
+ # import librosa
6
+ import torchaudio
7
  from diffusers import DDIMScheduler
8
  from transformers import AutoProcessor, ClapModel
9
  from model.udit import UDiT
 
99
  @spaces.GPU
100
  def tse(gt_file_input, text_input, num_infer_steps, eta, seed, guidance_scale, guidance_rescale):
101
  with torch.no_grad():
102
+ # mixture, _ = librosa.load(gt_file_input, sr=sample_rate)
103
+ mixture, sr = torchaudio.load(gt_file_input)
104
+ if sr != sample_rate:
105
+ resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=sample_rate)
106
+ mixture = resampler(mixture)
107
+ sr = sample_rate
108
+
109
  # Check the length of the audio in samples
110
  current_length = len(mixture)
111
  target_length = sample_rate * 10