Spaces:

openfree
/

ginigen-sora

Running

eitanrich commited on Oct 30, 2024

Commit

028b6a1

1 Parent(s): 645fba0

VAE: Support retuning intermediate features for 3d perceptual loss

Files changed (1) hide show

xora/models/autoencoders/video_autoencoder.py CHANGED Viewed

@@ -310,7 +310,9 @@ class Encoder(nn.Module):
             * self.patch_size
         )
-    def forward(self, sample: torch.FloatTensor) -> torch.FloatTensor:
         r"""The forward method of the `Encoder` class."""
         downsample_in_time = sample.shape[2] != 1
@@ -332,10 +334,14 @@ class Encoder(nn.Module):
             else lambda x: x
         )
         for down_block in self.down_blocks:
             sample = checkpoint_fn(down_block)(
                 sample, downsample_in_time=downsample_in_time
             )
         sample = checkpoint_fn(self.mid_block)(sample)
@@ -363,6 +369,11 @@ class Encoder(nn.Module):
             else:
                 raise ValueError(f"Invalid input shape: {sample.shape}")
         return sample

             * self.patch_size
         )
+    def forward(
+        self, sample: torch.FloatTensor, return_features=False
+    ) -> torch.FloatTensor:
         r"""The forward method of the `Encoder` class."""
         downsample_in_time = sample.shape[2] != 1
             else lambda x: x
         )
+        if return_features:
+            features = []
         for down_block in self.down_blocks:
             sample = checkpoint_fn(down_block)(
                 sample, downsample_in_time=downsample_in_time
             )
+            if return_features:
+                features.append(sample)
         sample = checkpoint_fn(self.mid_block)(sample)
             else:
                 raise ValueError(f"Invalid input shape: {sample.shape}")
+        if return_features:
+            features.append(
+                sample[:, sample.shape[1] // 2, ...]
+            )  # Add the latent means as final feature
+            return sample, features
         return sample