MusicGenDemucs

Runtime error

App Files Files Community

nakas commited on Jun 30, 2023

Commit

61440ca

1 Parent(s): fed2fbd

Update audiocraft/data/audio.py

Browse files

Files changed (1) hide show

audiocraft/data/audio.py +16 -7

audiocraft/data/audio.py CHANGED Viewed

@@ -72,7 +72,6 @@ def audio_info(filepath: tp.Union[str, Path]) -> AudioFileInfo:
 def _av_read(filepath: tp.Union[str, Path], seek_time: float = 0, duration: float = -1.) -> tp.Tuple[torch.Tensor, int]:
     """FFMPEG-based audio file reading using PyAV bindings.
     Soundfile cannot read mp3 and av_read is more efficient than torchaudio.
     Args:
         filepath (str or Path): Path to audio file to read.
         seek_time (float): Time at which to start reading in the file.
@@ -116,7 +115,6 @@ def _av_read(filepath: tp.Union[str, Path], seek_time: float = 0, duration: floa
 def audio_read(filepath: tp.Union[str, Path], seek_time: float = 0.,
                duration: float = -1., pad: bool = False) -> tp.Tuple[torch.Tensor, int]:
     """Read audio by picking the most appropriate backend tool based on the audio format.
     Args:
         filepath (str or Path): Path to audio file to read.
         seek_time (float): Time at which to start reading in the file.
@@ -152,7 +150,7 @@ def audio_read(filepath: tp.Union[str, Path], seek_time: float = 0.,
 def audio_write(stem_name: tp.Union[str, Path],
                 wav: torch.Tensor, sample_rate: int,
-                normalize: bool = True,
                 strategy: str = 'peak', peak_clip_headroom_db: float = 1,
                 rms_headroom_db: float = 18, loudness_headroom_db: float = 14,
                 loudness_compressor: bool = False,
@@ -161,6 +159,8 @@ def audio_write(stem_name: tp.Union[str, Path],
     """Convenience function for saving audio to disk. Returns the filename the audio was written to.
     Args:
         stem_name (str or Path): Filename without extension which will be added automatically.
         normalize (bool): if `True` (default), normalizes according to the prescribed
             strategy (see after). If `False`, the strategy is only used in case clipping
             would happen.
@@ -172,7 +172,7 @@ def audio_write(stem_name: tp.Union[str, Path],
             than the `peak_clip` one to avoid further clipping.
         loudness_headroom_db (float): Target loudness for loudness normalization.
         loudness_compressor (bool): Uses tanh for soft clipping when strategy is 'loudness'.
-        log_clipping (bool): If True, basic logging on stderr when clipping still
             occurs despite strategy (only for 'rms').
         make_parent_dir (bool): Make parent directory if it doesn't exist.
     Returns:
@@ -187,17 +187,26 @@ def audio_write(stem_name: tp.Union[str, Path],
     wav = normalize_audio(wav, normalize, strategy, peak_clip_headroom_db,
                           rms_headroom_db, loudness_headroom_db, log_clipping=log_clipping,
                           sample_rate=sample_rate, stem_name=str(stem_name))
-    suffix = '.wav'
     if not add_suffix:
         suffix = ''
     path = Path(str(stem_name) + suffix)
     if make_parent_dir:
         path.parent.mkdir(exist_ok=True, parents=True)
     try:
-        ta.save(path, wav, sample_rate)
     except Exception:
         if path.exists():
             # we do not want to leave half written files around.
             path.unlink()
         raise
-    return path

 def _av_read(filepath: tp.Union[str, Path], seek_time: float = 0, duration: float = -1.) -> tp.Tuple[torch.Tensor, int]:
     """FFMPEG-based audio file reading using PyAV bindings.
     Soundfile cannot read mp3 and av_read is more efficient than torchaudio.
     Args:
         filepath (str or Path): Path to audio file to read.
         seek_time (float): Time at which to start reading in the file.
 def audio_read(filepath: tp.Union[str, Path], seek_time: float = 0.,
                duration: float = -1., pad: bool = False) -> tp.Tuple[torch.Tensor, int]:
     """Read audio by picking the most appropriate backend tool based on the audio format.
     Args:
         filepath (str or Path): Path to audio file to read.
         seek_time (float): Time at which to start reading in the file.
 def audio_write(stem_name: tp.Union[str, Path],
                 wav: torch.Tensor, sample_rate: int,
+                format: str = 'wav', mp3_rate: int = 320, normalize: bool = True,
                 strategy: str = 'peak', peak_clip_headroom_db: float = 1,
                 rms_headroom_db: float = 18, loudness_headroom_db: float = 14,
                 loudness_compressor: bool = False,
     """Convenience function for saving audio to disk. Returns the filename the audio was written to.
     Args:
         stem_name (str or Path): Filename without extension which will be added automatically.
+        format (str): Either "wav" or "mp3".
+        mp3_rate (int): kbps when using mp3s.
         normalize (bool): if `True` (default), normalizes according to the prescribed
             strategy (see after). If `False`, the strategy is only used in case clipping
             would happen.
             than the `peak_clip` one to avoid further clipping.
         loudness_headroom_db (float): Target loudness for loudness normalization.
         loudness_compressor (bool): Uses tanh for soft clipping when strategy is 'loudness'.
+         when strategy is 'loudness'log_clipping (bool): If True, basic logging on stderr when clipping still
             occurs despite strategy (only for 'rms').
         make_parent_dir (bool): Make parent directory if it doesn't exist.
     Returns:
     wav = normalize_audio(wav, normalize, strategy, peak_clip_headroom_db,
                           rms_headroom_db, loudness_headroom_db, log_clipping=log_clipping,
                           sample_rate=sample_rate, stem_name=str(stem_name))
+    kwargs: dict = {}
+    if format == 'mp3':
+        suffix = '.mp3'
+        kwargs.update({"compression": mp3_rate})
+    elif format == 'wav':
+        wav = i16_pcm(wav)
+        suffix = '.wav'
+        kwargs.update({"encoding": "PCM_S", "bits_per_sample": 16})
+    else:
+        raise RuntimeError(f"Invalid format {format}. Only wav or mp3 are supported.")
     if not add_suffix:
         suffix = ''
     path = Path(str(stem_name) + suffix)
     if make_parent_dir:
         path.parent.mkdir(exist_ok=True, parents=True)
     try:
+        ta.save(path, wav, sample_rate, **kwargs)
     except Exception:
         if path.exists():
             # we do not want to leave half written files around.
             path.unlink()
         raise
+    return path