Spaces:
Runtime error
Runtime error
change audio format from flac to 320kbps mp3 (#3)
Browse files- change audio format from flac to 320kbps mp3 (2ed96609921a553f9d175ad30437e9da5923dd15)
- pipeline_ace_step.py +5 -4
pipeline_ace_step.py
CHANGED
|
@@ -24,6 +24,7 @@ from models.ace_step_transformer import ACEStepTransformer2DModel
|
|
| 24 |
from models.lyrics_utils.lyric_tokenizer import VoiceBpeTokenizer
|
| 25 |
from apg_guidance import apg_forward, MomentumBuffer, cfg_forward, cfg_zero_star, cfg_double_condition_forward
|
| 26 |
import torchaudio
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
torch.backends.cudnn.benchmark = False
|
|
@@ -917,7 +918,7 @@ class ACEStepPipeline:
|
|
| 917 |
target_latents = torch.cate([to_right_pad_gt_latents, target_latents], dim=0)
|
| 918 |
return target_latents
|
| 919 |
|
| 920 |
-
def latents2audio(self, latents, target_wav_duration_second=30, sample_rate=48000, save_path=None, format="
|
| 921 |
output_audio_paths = []
|
| 922 |
bs = latents.shape[0]
|
| 923 |
audio_lengths = [target_wav_duration_second * sample_rate] * bs
|
|
@@ -930,7 +931,7 @@ class ACEStepPipeline:
|
|
| 930 |
output_audio_paths.append(output_audio_path)
|
| 931 |
return output_audio_paths
|
| 932 |
|
| 933 |
-
def save_wav_file(self, target_wav, idx, save_path=None, sample_rate=48000, format="
|
| 934 |
if save_path is None:
|
| 935 |
logger.warning("save_path is None, using default path ./outputs/")
|
| 936 |
base_path = f"./outputs"
|
|
@@ -941,7 +942,7 @@ class ACEStepPipeline:
|
|
| 941 |
|
| 942 |
output_path_flac = f"{base_path}/output_{time.strftime('%Y%m%d%H%M%S')}_{idx}.{format}"
|
| 943 |
target_wav = target_wav.float()
|
| 944 |
-
torchaudio.save(output_path_flac, target_wav, sample_rate=sample_rate, format=format)
|
| 945 |
return output_path_flac
|
| 946 |
|
| 947 |
def infer_latents(self, input_audio_path):
|
|
@@ -986,7 +987,7 @@ class ACEStepPipeline:
|
|
| 986 |
edit_n_max: float = 1.0,
|
| 987 |
edit_n_avg: int = 1,
|
| 988 |
save_path: str = None,
|
| 989 |
-
format: str = "
|
| 990 |
batch_size: int = 1,
|
| 991 |
debug: bool = False,
|
| 992 |
):
|
|
|
|
| 24 |
from models.lyrics_utils.lyric_tokenizer import VoiceBpeTokenizer
|
| 25 |
from apg_guidance import apg_forward, MomentumBuffer, cfg_forward, cfg_zero_star, cfg_double_condition_forward
|
| 26 |
import torchaudio
|
| 27 |
+
import torio
|
| 28 |
|
| 29 |
|
| 30 |
torch.backends.cudnn.benchmark = False
|
|
|
|
| 918 |
target_latents = torch.cate([to_right_pad_gt_latents, target_latents], dim=0)
|
| 919 |
return target_latents
|
| 920 |
|
| 921 |
+
def latents2audio(self, latents, target_wav_duration_second=30, sample_rate=48000, save_path=None, format="mp3"):
|
| 922 |
output_audio_paths = []
|
| 923 |
bs = latents.shape[0]
|
| 924 |
audio_lengths = [target_wav_duration_second * sample_rate] * bs
|
|
|
|
| 931 |
output_audio_paths.append(output_audio_path)
|
| 932 |
return output_audio_paths
|
| 933 |
|
| 934 |
+
def save_wav_file(self, target_wav, idx, save_path=None, sample_rate=48000, format="mp3"):
|
| 935 |
if save_path is None:
|
| 936 |
logger.warning("save_path is None, using default path ./outputs/")
|
| 937 |
base_path = f"./outputs"
|
|
|
|
| 942 |
|
| 943 |
output_path_flac = f"{base_path}/output_{time.strftime('%Y%m%d%H%M%S')}_{idx}.{format}"
|
| 944 |
target_wav = target_wav.float()
|
| 945 |
+
torchaudio.save(output_path_flac, target_wav, sample_rate=sample_rate, format=format, compression=torio.io.CodecConfig(bit_rate=320000))
|
| 946 |
return output_path_flac
|
| 947 |
|
| 948 |
def infer_latents(self, input_audio_path):
|
|
|
|
| 987 |
edit_n_max: float = 1.0,
|
| 988 |
edit_n_avg: int = 1,
|
| 989 |
save_path: str = None,
|
| 990 |
+
format: str = "mp3",
|
| 991 |
batch_size: int = 1,
|
| 992 |
debug: bool = False,
|
| 993 |
):
|