Spaces:

mattricesound
/

RemFx

Runtime error

App Files Files Community

mattricesound commited on Mar 9, 2023

Commit

6448f47

1 Parent(s): e543fe8

Fix new dataset to work for remfx training

Browse files

Files changed (3) hide show

README.md +1 -2
remfx/datasets.py +0 -4
remfx/models.py +8 -32

README.md CHANGED Viewed

@@ -9,10 +9,9 @@
 5. `pip install -e umx`
 ## Download [VocalSet Dataset](https://zenodo.org/record/1193957)
-1. `wget https://zenodo.org/record/1193957/files/VocalSet.zip?download=1`
 2. `mv VocalSet.zip?download=1 VocalSet.zip`
 3. `unzip VocalSet.zip`
-4. Manually split singers into train, val, test directories
 # Training
 ## Steps

 5. `pip install -e umx`
 ## Download [VocalSet Dataset](https://zenodo.org/record/1193957)
+1. `wget https://zenodo.org/record/1442513/files/VocalSet1-2.zip?download=1`
 2. `mv VocalSet.zip?download=1 VocalSet.zip`
 3. `unzip VocalSet.zip`
 # Training
 ## Steps

remfx/datasets.py CHANGED Viewed

@@ -19,7 +19,6 @@ from remfx.utils import create_sequential_chunks
 # https://zenodo.org/record/1193957 -> VocalSet
 ALL_EFFECTS = effects.Pedalboard_Effects
-print(ALL_EFFECTS)
 singer_splits = {
@@ -206,7 +205,6 @@ class VocalSet(Dataset):
         else:
             num_kept_effects = len(self.effects_to_keep)
         effect_indices = effect_indices[:num_kept_effects]
-        print(effect_indices)
         # Index in effect settings
         effect_names_to_apply = [self.effects_to_keep[i] for i in effect_indices]
@@ -249,8 +247,6 @@ class VocalSet(Dataset):
         for label_idx in dry_labels:
             dry_labels_tensor[label_idx] = 1.0
-        # effects_present = torch.sum(one_hot, dim=0).float()
-        print(dry_labels_tensor, wet_labels_tensor)
         # Normalize
         normalized_dry = self.normalize(dry)
         normalized_wet = self.normalize(wet)

 # https://zenodo.org/record/1193957 -> VocalSet
 ALL_EFFECTS = effects.Pedalboard_Effects
 singer_splits = {
         else:
             num_kept_effects = len(self.effects_to_keep)
         effect_indices = effect_indices[:num_kept_effects]
         # Index in effect settings
         effect_names_to_apply = [self.effects_to_keep[i] for i in effect_indices]
         for label_idx in dry_labels:
             dry_labels_tensor[label_idx] = 1.0
         # Normalize
         normalized_dry = self.normalize(dry)
         normalized_wet = self.normalize(wet)

remfx/models.py CHANGED Viewed

@@ -94,9 +94,9 @@ class RemFXModel(pl.LightningModule):
         return loss
     def common_step(self, batch, batch_idx, mode: str = "train"):
-        loss, output = self.model(batch)
         self.log(f"{mode}_loss", loss)
-        x, y, label = batch
         # Metric logging
         with torch.no_grad():
             for metric in self.metrics:
@@ -123,7 +123,7 @@ class RemFXModel(pl.LightningModule):
     def on_train_batch_start(self, batch, batch_idx):
         # Log initial audio
         if self.log_train_audio:
-            x, y, label = batch
             # Concat samples together for easier viewing in dashboard
             input_samples = rearrange(x, "b c t -> c (b t)").unsqueeze(0)
             target_samples = rearrange(y, "b c t -> c (b t)").unsqueeze(0)
@@ -145,7 +145,7 @@ class RemFXModel(pl.LightningModule):
             self.log_train_audio = False
     def on_validation_batch_start(self, batch, batch_idx, dataloader_idx):
-        x, target, label = batch
         # Log Input Metrics
         for metric in self.metrics:
             # SISDR returns negative values, so negate them
@@ -189,7 +189,7 @@ class RemFXModel(pl.LightningModule):
     def on_test_batch_start(self, batch, batch_idx, dataloader_idx):
         self.on_validation_batch_start(batch, batch_idx, dataloader_idx)
         # Log FAD
-        x, target, label = batch
         self.log(
             "Input_FAD",
             self.metrics["FAD"](x, target),
@@ -237,7 +237,7 @@ class OpenUnmixModel(torch.nn.Module):
         self.l1loss = torch.nn.L1Loss()
     def forward(self, batch):
-        x, target, label = batch
         X = spectrogram(x, self.window, self.n_fft, self.hop_length, self.alpha)
         Y = self.model(X)
         sep_out = self.separator(x).squeeze(1)
@@ -260,7 +260,7 @@ class DemucsModel(torch.nn.Module):
         self.l1loss = torch.nn.L1Loss()
     def forward(self, batch):
-        x, target, label = batch
         output = self.model(x).squeeze(1)
         loss = self.mrstftloss(output, target) + self.l1loss(output, target) * 100
         return loss, output
@@ -275,7 +275,7 @@ class DiffusionGenerationModel(nn.Module):
         self.model = DiffusionModel(in_channels=n_channels)
     def forward(self, batch):
-        x, target, label = batch
         sampled_out = self.model.sample(x)
         return self.model(x), sampled_out
@@ -481,30 +481,6 @@ class Cnn14(nn.Module):
         return clipwise_output
-def spectrogram(
-    x: torch.Tensor,
-    window: torch.Tensor,
-    n_fft: int,
-    hop_length: int,
-    alpha: float,
-) -> torch.Tensor:
-    bs, chs, samp = x.size()
-    x = x.view(bs * chs, -1)  # move channels onto batch dim
-    X = torch.stft(
-        x,
-        n_fft=n_fft,
-        hop_length=hop_length,
-        window=window,
-        return_complex=True,
-    )
-    # move channels back
-    X = X.view(bs, chs, X.shape[-2], X.shape[-1])
-    return torch.pow(X.abs() + 1e-8, alpha)
 class FXClassifier(pl.LightningModule):
     def __init__(
         self,

         return loss
     def common_step(self, batch, batch_idx, mode: str = "train"):
+        x, y, _, _ = batch
+        loss, output = self.model((x, y))
         self.log(f"{mode}_loss", loss)
         # Metric logging
         with torch.no_grad():
             for metric in self.metrics:
     def on_train_batch_start(self, batch, batch_idx):
         # Log initial audio
         if self.log_train_audio:
+            x, y, _, _ = batch
             # Concat samples together for easier viewing in dashboard
             input_samples = rearrange(x, "b c t -> c (b t)").unsqueeze(0)
             target_samples = rearrange(y, "b c t -> c (b t)").unsqueeze(0)
             self.log_train_audio = False
     def on_validation_batch_start(self, batch, batch_idx, dataloader_idx):
+        x, target, _, _ = batch
         # Log Input Metrics
         for metric in self.metrics:
             # SISDR returns negative values, so negate them
     def on_test_batch_start(self, batch, batch_idx, dataloader_idx):
         self.on_validation_batch_start(batch, batch_idx, dataloader_idx)
         # Log FAD
+        x, target, _, _ = batch
         self.log(
             "Input_FAD",
             self.metrics["FAD"](x, target),
         self.l1loss = torch.nn.L1Loss()
     def forward(self, batch):
+        x, target = batch
         X = spectrogram(x, self.window, self.n_fft, self.hop_length, self.alpha)
         Y = self.model(X)
         sep_out = self.separator(x).squeeze(1)
         self.l1loss = torch.nn.L1Loss()
     def forward(self, batch):
+        x, target = batch
         output = self.model(x).squeeze(1)
         loss = self.mrstftloss(output, target) + self.l1loss(output, target) * 100
         return loss, output
         self.model = DiffusionModel(in_channels=n_channels)
     def forward(self, batch):
+        x, target = batch
         sampled_out = self.model.sample(x)
         return self.model(x), sampled_out
         return clipwise_output
 class FXClassifier(pl.LightningModule):
     def __init__(
         self,