Update app.py
Browse files
app.py
CHANGED
@@ -2,7 +2,8 @@ import gradio as gr
|
|
2 |
import spaces
|
3 |
import yaml
|
4 |
import torch
|
5 |
-
import librosa
|
|
|
6 |
from diffusers import DDIMScheduler
|
7 |
from transformers import AutoProcessor, ClapModel
|
8 |
from model.udit import UDiT
|
@@ -98,7 +99,13 @@ def sample_diffusion(mixture, timbre, ddim_steps=50, eta=0, seed=2023, guidance_
|
|
98 |
@spaces.GPU
|
99 |
def tse(gt_file_input, text_input, num_infer_steps, eta, seed, guidance_scale, guidance_rescale):
|
100 |
with torch.no_grad():
|
101 |
-
mixture, _ = librosa.load(gt_file_input, sr=sample_rate)
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
# Check the length of the audio in samples
|
103 |
current_length = len(mixture)
|
104 |
target_length = sample_rate * 10
|
|
|
2 |
import spaces
|
3 |
import yaml
|
4 |
import torch
|
5 |
+
# import librosa
|
6 |
+
import torchaudio
|
7 |
from diffusers import DDIMScheduler
|
8 |
from transformers import AutoProcessor, ClapModel
|
9 |
from model.udit import UDiT
|
|
|
99 |
@spaces.GPU
|
100 |
def tse(gt_file_input, text_input, num_infer_steps, eta, seed, guidance_scale, guidance_rescale):
|
101 |
with torch.no_grad():
|
102 |
+
# mixture, _ = librosa.load(gt_file_input, sr=sample_rate)
|
103 |
+
mixture, sr = torchaudio.load(gt_file_input)
|
104 |
+
if sr != sample_rate:
|
105 |
+
resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=sample_rate)
|
106 |
+
mixture = resampler(mixture)
|
107 |
+
sr = sample_rate
|
108 |
+
|
109 |
# Check the length of the audio in samples
|
110 |
current_length = len(mixture)
|
111 |
target_length = sample_rate * 10
|