Spaces:

teticio
/

audio-diffusion

Runtime error

teticio commited on Oct 19, 2022

Commit

529c646

1 Parent(s): 4f552a8

get working for monochrome

Files changed (3) hide show

audiodiffusion/utils.py CHANGED Viewed

@@ -31,27 +31,6 @@ def renew_vae_resnet_paths(old_list, n_shave_prefix_segments=0):
     return mapping
-def renew_attention_paths(old_list, n_shave_prefix_segments=0):
-    """
-    Updates paths inside attentions to the new naming scheme (local renaming)
-    """
-    mapping = []
-    for old_item in old_list:
-        new_item = old_item
-        #         new_item = new_item.replace('norm.weight', 'group_norm.weight')
-        #         new_item = new_item.replace('norm.bias', 'group_norm.bias')
-        #         new_item = new_item.replace('proj_out.weight', 'proj_attn.weight')
-        #         new_item = new_item.replace('proj_out.bias', 'proj_attn.bias')
-        #         new_item = shave_segments(new_item, n_shave_prefix_segments=n_shave_prefix_segments)
-        mapping.append({"old": old_item, "new": new_item})
-    return mapping
 def renew_vae_attention_paths(old_list, n_shave_prefix_segments=0):
     """
     Updates paths inside attentions to the new naming scheme (local renaming)

     return mapping
 def renew_vae_attention_paths(old_list, n_shave_prefix_segments=0):
     """
     Updates paths inside attentions to the new naming scheme (local renaming)

config/ldm_autoencoder_kl.yaml CHANGED Viewed

@@ -4,22 +4,23 @@ model:
   target: ldm.models.autoencoder.AutoencoderKL
   params:
     monitor: "val/rec_loss"
-    embed_dim: 3
     lossconfig:
       target: ldm.modules.losses.LPIPSWithDiscriminator
       params:
         disc_start: 50001
         kl_weight: 0.000001
         disc_weight: 0.5
     ddconfig:
       double_z: True
-      z_channels: 4
       resolution: 256
-      in_channels: 3
-      out_ch: 3
       ch: 128
-      ch_mult: [ 1,2,4 ]  # num_down = len(ch_mult)-1
       num_res_blocks: 2
       attn_resolutions: [ ]
       dropout: 0.0
@@ -27,5 +28,5 @@ model:
 lightning:
   trainer:
     benchmark: True
-    accelerator: gpu
     devices: 1

   target: ldm.models.autoencoder.AutoencoderKL
   params:
     monitor: "val/rec_loss"
+    embed_dim: 1  # = in_channels
     lossconfig:
       target: ldm.modules.losses.LPIPSWithDiscriminator
       params:
         disc_start: 50001
         kl_weight: 0.000001
         disc_weight: 0.5
+        disc_in_channels: 1  # = out_ch
     ddconfig:
       double_z: True
+      z_channels: 1  # must = embed_dim due to HF limitation
       resolution: 256
+      in_channels: 1
+      out_ch: 1
       ch: 128
+      ch_mult: [ 1,2,4,4 ]  # num_down = len(ch_mult)-1
       num_res_blocks: 2
       attn_resolutions: [ ]
       dropout: 0.0
 lightning:
   trainer:
     benchmark: True
+    #accelerator: gpu
     devices: 1

scripts/train_vae.py CHANGED Viewed

@@ -1,10 +1,6 @@
 # pip install -e git+https://github.com/CompVis/stable-diffusion.git@master
 # pip install -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
-# TODO
-# grayscale
-# docstrings
 import os
 import argparse
@@ -117,9 +113,9 @@ class HFModelCheckpoint(ModelCheckpoint):
         self.hf_checkpoint = hf_checkpoint
     def on_train_epoch_end(self, trainer, pl_module):
         super().on_train_epoch_end(trainer, pl_module)
-        ldm_checkpoint = self.format_checkpoint_name(
-            {'epoch': trainer.current_epoch})
         convert_ldm_to_hf_vae(ldm_checkpoint, self.ldm_config,
                               self.hf_checkpoint)
@@ -148,6 +144,7 @@ if __name__ == "__main__":
                         default=1)
     parser.add_argument("--resolution", type=int, default=256)
     parser.add_argument("--hop_length", type=int, default=512)
     args = parser.parse_args()
     config = OmegaConf.load(args.ldm_config_file)
@@ -165,7 +162,8 @@ if __name__ == "__main__":
         trainer_opt,
         resume_from_checkpoint=args.resume_from_checkpoint,
         callbacks=[
-            ImageLogger(channels=config.model.params.ddconfig.out_ch,
                         resolution=args.resolution,
                         hop_length=args.hop_length),
             HFModelCheckpoint(ldm_config=config,

 # pip install -e git+https://github.com/CompVis/stable-diffusion.git@master
 # pip install -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
 import os
 import argparse
         self.hf_checkpoint = hf_checkpoint
     def on_train_epoch_end(self, trainer, pl_module):
+        ldm_checkpoint = self._get_metric_interpolated_filepath_name(
+            {'epoch': trainer.current_epoch}, trainer)
         super().on_train_epoch_end(trainer, pl_module)
         convert_ldm_to_hf_vae(ldm_checkpoint, self.ldm_config,
                               self.hf_checkpoint)
                         default=1)
     parser.add_argument("--resolution", type=int, default=256)
     parser.add_argument("--hop_length", type=int, default=512)
+    parser.add_argument("--save_images_batches", type=int, default=1000)
     args = parser.parse_args()
     config = OmegaConf.load(args.ldm_config_file)
         trainer_opt,
         resume_from_checkpoint=args.resume_from_checkpoint,
         callbacks=[
+            ImageLogger(every=args.save_images_batches,
+                        channels=config.model.params.ddconfig.out_ch,
                         resolution=args.resolution,
                         hop_length=args.hop_length),
             HFModelCheckpoint(ldm_config=config,