RemFx

Sleeping

+import torch
+from torch.utils.data import Dataset
+import torchaudio
+import torchaudio.transforms as T
+import torch.nn.functional as F
+from pathlib import Path
+from typing import List
+# https://zenodo.org/record/7044411/
+LENGTH = 2**18  # 12 seconds
+ORIG_SR = 48000
+class GuitarFXDataset(Dataset):
+    def __init__(
+        self,
+        root: str,
+        sample_rate: int,
+        length: int = LENGTH,
+        effect_type: List[str] = None,
+    ):
+        self.length = length
+        self.wet_files = []
+        self.dry_files = []
+        self.labels = []
+        self.root = Path(root)
+        if effect_type is None:
+            effect_type = [
+                d.name for d in self.root.iterdir() if d.is_dir() and d != "Clean"
+            ]
+        for i, effect in enumerate(effect_type):
+            for pickup in Path(self.root / effect).iterdir():
+                self.wet_files += list(pickup.glob("*.wav"))
+                self.dry_files += list(self.root.glob(f"Clean/{pickup.name}/**/*.wav"))
+                self.labels += [i] * len(self.wet_files)
+        print(
+            f"Found {len(self.wet_files)} wet files and {len(self.dry_files)} dry files"
+        )
+        self.resampler = T.Resample(ORIG_SR, sample_rate)
+    def __len__(self):
+        return len(self.dry_files)
+    def __getitem__(self, idx):
+        x, sr = torchaudio.load(self.wet_files[idx])
+        y, sr = torchaudio.load(self.dry_files[idx])
+        effect_label = self.labels[idx]
+        resampled_x = self.resampler(x)
+        resampled_y = self.resampler(y)
+        # Pad or crop to length
+        if resampled_x.shape[-1] < self.length:
+            resampled_x = F.pad(resampled_x, (0, self.length - resampled_x.shape[1]))
+        elif resampled_x.shape[-1] > self.length:
+            resampled_x = resampled_x[:, : self.length]
+        if resampled_y.shape[-1] < self.length:
+            resampled_y = F.pad(resampled_y, (0, self.length - resampled_y.shape[1]))
+        elif resampled_y.shape[-1] > self.length:
+            resampled_y = resampled_y[:, : self.length]
+        return (resampled_x, resampled_y, effect_label)

download_egfx.sh ADDED Viewed

	@@ -0,0 +1,21 @@

+#/bin/bash
+mkdir -p data
+cd data
+mkdir -p egfx
+cd egfx
+wget https://zenodo.org/record/7044411/files/BluesDriver.zip?download=1 -O BluesDriver.zip
+wget https://zenodo.org/record/7044411/files/Chorus.zip?download=1 -O Chorus.zip
+wget https://zenodo.org/record/7044411/files/Clean.zip?download=1 -O Clean.zip
+wget https://zenodo.org/record/7044411/files/Digital-Delay.zip?download=1 -O Digital-Delay.zip
+wget https://zenodo.org/record/7044411/files/Flanger.zip?download=1 -O Flanger.zip
+wget https://zenodo.org/record/7044411/files/Hall-Reverb.zip?download=1 -O Hall-Reverb.zip
+wget https://zenodo.org/record/7044411/files/Phaser.zip?download=1 -O Phaser.zip
+wget https://zenodo.org/record/7044411/files/Plate-Reverb.zip?download=1 -O Plate-Reverb.zip
+wget https://zenodo.org/record/7044411/files/RAT.zip?download=1 -O RAT.zip
+wget https://zenodo.org/record/7044411/files/Spring-Reverb.zip?download=1 -O Spring-Reverb.zip
+wget https://zenodo.org/record/7044411/files/Sweep-Echo.zip?download=1 -O Sweep-Echo.zip
+wget https://zenodo.org/record/7044411/files/TapeEcho.zip?download=1 -O TapeEcho.zip
+wget https://zenodo.org/record/7044411/files/TubeScreamer.zip?download=1 -O TubeScreamer.zip
+unzip \*.zip

egfx.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

guitar_generation_test.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

models.py ADDED Viewed

	@@ -0,0 +1,105 @@

+from audio_diffusion_pytorch import AudioDiffusionModel
+import torch
+from torch import Tensor
+import pytorch_lightning as pl
+from einops import rearrange
+import wandb
+SAMPLE_RATE = 22050  # From audio-diffusion-pytorch
+class TCNWrapper(pl.LightningModule):
+    def __init__(self):
+        super().__init__()
+        self.model = AudioDiffusionModel(in_channels=1)
+    def forward(self, x: torch.Tensor):
+        return self.model(x)
+    def training_step(self, batch, batch_idx):
+        loss = self.common_step(batch, batch_idx, mode="train")
+        return loss
+    def validation_step(self, batch, batch_idx):
+        loss = self.common_step(batch, batch_idx, mode="val")
+    def common_step(self, batch, batch_idx, mode: str = "train"):
+        x, target, label = batch
+        loss = self(x)
+        self.log(f"{mode}_loss", loss, on_step=True, on_epoch=True)
+        return loss
+    def configure_optimizers(self):
+        return torch.optim.Adam(
+            self.parameters(), lr=1e-4, betas=(0.95, 0.999), eps=1e-6, weight_decay=1e-3
+        )
+class AudioDiffusionWrapper(pl.LightningModule):
+    def __init__(self):
+        super().__init__()
+        self.model = AudioDiffusionModel(in_channels=1)
+    def forward(self, x: torch.Tensor):
+        return self.model(x)
+    def sample(self, *args, **kwargs) -> Tensor:
+        return self.model.sample(*args, **kwargs)
+    def training_step(self, batch, batch_idx):
+        loss = self.common_step(batch, batch_idx, mode="train")
+        return loss
+    def validation_step(self, batch, batch_idx):
+        loss = self.common_step(batch, batch_idx, mode="val")
+    def common_step(self, batch, batch_idx, mode: str = "train"):
+        x, target, label = batch
+        loss = self(x)
+        self.log(f"{mode}_loss", loss, on_step=True, on_epoch=True)
+        return loss
+    def configure_optimizers(self):
+        return torch.optim.Adam(
+            self.parameters(), lr=1e-4, betas=(0.95, 0.999), eps=1e-6, weight_decay=1e-3
+        )
+    def on_validation_epoch_start(self):
+        self.log_next = True
+    def on_validation_batch_start(self, batch, batch_idx, dataloader_idx):
+        x, target, label = batch
+        if self.log_next:
+            self.log_sample(x)
+            self.log_next = False
+    @torch.no_grad()
+    def log_sample(self, batch, num_steps=10):
+        # Get start diffusion noise
+        noise = torch.randn(batch.shape, device=self.device)
+        sampled = self.model.sample(
+            noise=noise, num_steps=num_steps  # Suggested range: 2-50
+        )
+        self.log_wandb_audio_batch(
+            id="sample",
+            samples=sampled,
+            sampling_rate=SAMPLE_RATE,
+            caption=f"Sampled in {num_steps} steps",
+        )
+def log_wandb_audio_batch(
+    id: str, samples: Tensor, sampling_rate: int, caption: str = ""
+):
+    num_items = samples.shape[0]
+    samples = rearrange(samples, "b c t -> b t c")
+    for idx in range(num_items):
+        wandb.log(
+            {
+                f"sample_{idx}_{id}": wandb.Audio(
+                    samples[idx].cpu().numpy(),
+                    caption=caption,
+                    sample_rate=sampling_rate,
+                )
+            }
+        )

train.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from pytorch_lightning.loggers import WandbLogger
+import pytorch_lightning as pl
+import torch
+from torch.utils.data import DataLoader
+from datasets import GuitarFXDataset
+from models import AudioDiffusionWrapper
+SAMPLE_RATE = 22050
+TRAIN_SPLIT = 0.8
+def main():
+    # wandb_logger = WandbLogger(project="RemFX", save_dir="./")
+    trainer = pl.Trainer()  # logger=wandb_logger)
+    guitfx = GuitarFXDataset(
+        root="/Users/matthewrice/mir_datasets/egfxset",
+        sample_rate=SAMPLE_RATE,
+        effect_type=["Phaser"],
+    )
+    train_size = int(TRAIN_SPLIT * len(guitfx))
+    val_size = len(guitfx) - train_size
+    train_dataset, val_dataset = torch.utils.data.random_split(
+        guitfx, [train_size, val_size]
+    )
+    train = DataLoader(train_dataset, batch_size=2)
+    val = DataLoader(val_dataset, batch_size=2)
+    model = AudioDiffusionWrapper()
+    trainer.fit(model=model, train_dataloaders=train, val_dataloaders=val)
+if __name__ == "__main__":
+    main()