Spaces:
Runtime error
Runtime error
Commit
Β·
797a86a
1
Parent(s):
6ba247f
refactored loggers
Browse files- TODO.md +2 -0
- audio_utils.py +0 -42
- models/config/train_local.yaml +4 -1
- models/training_environment.py +41 -12
TODO.md
CHANGED
|
@@ -12,6 +12,8 @@
|
|
| 12 |
- β
Download songs from [Best Ballroom](https://www.youtube.com/channel/UC0bYSnzAFMwPiEjmVsrvmRg)
|
| 13 |
|
| 14 |
- β
fix nan values
|
|
|
|
|
|
|
| 15 |
|
| 16 |
## Notes
|
| 17 |
|
|
|
|
| 12 |
- β
Download songs from [Best Ballroom](https://www.youtube.com/channel/UC0bYSnzAFMwPiEjmVsrvmRg)
|
| 13 |
|
| 14 |
- β
fix nan values
|
| 15 |
+
- Try higher mels (224) and more ffts (2048)
|
| 16 |
+
- Verify random sample of dataset outputs by hand.
|
| 17 |
|
| 18 |
## Notes
|
| 19 |
|
audio_utils.py
DELETED
|
@@ -1,42 +0,0 @@
|
|
| 1 |
-
import librosa
|
| 2 |
-
from IPython.display import Audio, display
|
| 3 |
-
import matplotlib.pyplot as plt
|
| 4 |
-
import torch
|
| 5 |
-
SAMPLE_RIR_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/VOiCES_devkit/distant-16k/room-response/rm1/impulse/Lab41-SRI-VOiCES-rm1-impulse-mc01-stu-clo.wav"
|
| 6 |
-
|
| 7 |
-
SAMPLE_NOISE_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/VOiCES_devkit/distant-16k/distractors/rm1/babb/Lab41-SRI-VOiCES-rm1-babb-mc01-stu-clo.wav"
|
| 8 |
-
|
| 9 |
-
def plot_spectrogram(spec, title=None, ylabel="freq_bin", aspect="auto", xmax=None):
|
| 10 |
-
spec = spec.squeeze(0)
|
| 11 |
-
spec = spec.numpy()
|
| 12 |
-
fig, axs = plt.subplots(1, 1)
|
| 13 |
-
axs.set_title(title or "Spectrogram (db)")
|
| 14 |
-
axs.set_ylabel(ylabel)
|
| 15 |
-
axs.set_xlabel("frame")
|
| 16 |
-
im = axs.imshow(librosa.power_to_db(spec), origin="lower", aspect=aspect)
|
| 17 |
-
if xmax:
|
| 18 |
-
axs.set_xlim((0, xmax))
|
| 19 |
-
fig.colorbar(im, ax=axs)
|
| 20 |
-
plt.show(block=False)
|
| 21 |
-
|
| 22 |
-
def play_audio(waveform, sample_rate):
|
| 23 |
-
waveform = waveform.numpy()
|
| 24 |
-
|
| 25 |
-
num_channels, num_frames = waveform.shape
|
| 26 |
-
if num_channels == 1:
|
| 27 |
-
display(Audio(waveform[0], rate=sample_rate))
|
| 28 |
-
elif num_channels == 2:
|
| 29 |
-
display(Audio((waveform[0], waveform[1]), rate=sample_rate))
|
| 30 |
-
else:
|
| 31 |
-
raise ValueError("Waveform with more than 2 channels are not supported.")
|
| 32 |
-
|
| 33 |
-
def get_rir_sample(path, resample=None, processed=False):
|
| 34 |
-
rir_raw, sample_rate = torch.load(path)
|
| 35 |
-
if not processed:
|
| 36 |
-
return rir_raw, sample_rate
|
| 37 |
-
rir = rir_raw[:, int(sample_rate*1.01):int(sample_rate*1.3)]
|
| 38 |
-
rir = rir / torch.norm(rir, p=2)
|
| 39 |
-
rir = torch.flip(rir, [1])
|
| 40 |
-
return rir, sample_rate
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models/config/train_local.yaml
CHANGED
|
@@ -19,6 +19,7 @@ dance_ids: &dance_ids
|
|
| 19 |
data_module:
|
| 20 |
batch_size: 64
|
| 21 |
num_workers: 10
|
|
|
|
| 22 |
test_proportion: 0.2
|
| 23 |
|
| 24 |
datasets:
|
|
@@ -55,4 +56,6 @@ trainer:
|
|
| 55 |
|
| 56 |
training_environment:
|
| 57 |
learning_rate: 0.00053
|
| 58 |
-
|
|
|
|
|
|
|
|
|
| 19 |
data_module:
|
| 20 |
batch_size: 64
|
| 21 |
num_workers: 10
|
| 22 |
+
data_subset: 0.1
|
| 23 |
test_proportion: 0.2
|
| 24 |
|
| 25 |
datasets:
|
|
|
|
| 56 |
|
| 57 |
training_environment:
|
| 58 |
learning_rate: 0.00053
|
| 59 |
+
loggers:
|
| 60 |
+
models.training_environment.SpectrogramLogger:
|
| 61 |
+
frequency: 100
|
models/training_environment.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
|
|
| 1 |
from models.utils import calculate_metrics
|
| 2 |
|
| 3 |
-
|
| 4 |
import pytorch_lightning as pl
|
| 5 |
import torch
|
| 6 |
import torch.nn as nn
|
|
@@ -13,7 +14,6 @@ class TrainingEnvironment(pl.LightningModule):
|
|
| 13 |
criterion: nn.Module,
|
| 14 |
config: dict,
|
| 15 |
learning_rate=1e-4,
|
| 16 |
-
log_spectrograms=False,
|
| 17 |
*args,
|
| 18 |
**kwargs,
|
| 19 |
):
|
|
@@ -21,7 +21,9 @@ class TrainingEnvironment(pl.LightningModule):
|
|
| 21 |
self.model = model
|
| 22 |
self.criterion = criterion
|
| 23 |
self.learning_rate = learning_rate
|
| 24 |
-
self.
|
|
|
|
|
|
|
| 25 |
self.config = config
|
| 26 |
self.has_multi_label_predictions = (
|
| 27 |
not type(criterion).__name__ == "CrossEntropyLoss"
|
|
@@ -48,15 +50,9 @@ class TrainingEnvironment(pl.LightningModule):
|
|
| 48 |
multi_label=self.has_multi_label_predictions,
|
| 49 |
)
|
| 50 |
self.log_dict(metrics, prog_bar=True)
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
img_index = torch.randint(0, len(features), (1,)).item()
|
| 55 |
-
img = features[img_index][0]
|
| 56 |
-
img = (img - img.min()) / (img.max() - img.min())
|
| 57 |
-
tensorboard.add_image(
|
| 58 |
-
f"batch: {batch_index}, element: {img_index}", img, 0, dataformats="HW"
|
| 59 |
-
)
|
| 60 |
return loss
|
| 61 |
|
| 62 |
def validation_step(
|
|
@@ -88,3 +84,36 @@ class TrainingEnvironment(pl.LightningModule):
|
|
| 88 |
"lr_scheduler": scheduler,
|
| 89 |
"monitor": "val/loss",
|
| 90 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import importlib
|
| 2 |
from models.utils import calculate_metrics
|
| 3 |
|
| 4 |
+
from abc import ABC, abstractmethod
|
| 5 |
import pytorch_lightning as pl
|
| 6 |
import torch
|
| 7 |
import torch.nn as nn
|
|
|
|
| 14 |
criterion: nn.Module,
|
| 15 |
config: dict,
|
| 16 |
learning_rate=1e-4,
|
|
|
|
| 17 |
*args,
|
| 18 |
**kwargs,
|
| 19 |
):
|
|
|
|
| 21 |
self.model = model
|
| 22 |
self.criterion = criterion
|
| 23 |
self.learning_rate = learning_rate
|
| 24 |
+
self.experiment_loggers = load_loggers(
|
| 25 |
+
config["training_environment"].get("loggers", {})
|
| 26 |
+
)
|
| 27 |
self.config = config
|
| 28 |
self.has_multi_label_predictions = (
|
| 29 |
not type(criterion).__name__ == "CrossEntropyLoss"
|
|
|
|
| 50 |
multi_label=self.has_multi_label_predictions,
|
| 51 |
)
|
| 52 |
self.log_dict(metrics, prog_bar=True)
|
| 53 |
+
experiment = self.logger.experiment
|
| 54 |
+
for logger in self.experiment_loggers:
|
| 55 |
+
logger.step(experiment, batch_index, features, labels)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
return loss
|
| 57 |
|
| 58 |
def validation_step(
|
|
|
|
| 84 |
"lr_scheduler": scheduler,
|
| 85 |
"monitor": "val/loss",
|
| 86 |
}
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
class ExperimentLogger(ABC):
|
| 90 |
+
@abstractmethod
|
| 91 |
+
def step(self, experiment, data):
|
| 92 |
+
pass
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
class SpectrogramLogger(ExperimentLogger):
|
| 96 |
+
def __init__(self, frequency=100) -> None:
|
| 97 |
+
self.frequency = frequency
|
| 98 |
+
self.counter = 0
|
| 99 |
+
|
| 100 |
+
def step(self, experiment, batch_index, x, label):
|
| 101 |
+
if self.counter == self.frequency:
|
| 102 |
+
self.counter = 0
|
| 103 |
+
img_index = torch.randint(0, len(x), (1,)).item()
|
| 104 |
+
img = x[img_index][0]
|
| 105 |
+
img = (img - img.min()) / (img.max() - img.min())
|
| 106 |
+
experiment.add_image(
|
| 107 |
+
f"batch: {batch_index}, element: {img_index}", img, 0, dataformats="HW"
|
| 108 |
+
)
|
| 109 |
+
self.counter += 1
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def load_loggers(logger_config: dict) -> list[ExperimentLogger]:
|
| 113 |
+
loggers = []
|
| 114 |
+
for logger_path, kwargs in logger_config.items():
|
| 115 |
+
module_name, class_name = logger_path.rsplit(".", 1)
|
| 116 |
+
module = importlib.import_module(module_name)
|
| 117 |
+
Logger = getattr(module, class_name)
|
| 118 |
+
loggers.append(Logger(**kwargs))
|
| 119 |
+
return loggers
|