Spaces:
Running
Running
File size: 4,217 Bytes
9fd1204 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
from typing import Optional, Tuple
import numpy as np
import torch
from diffusers.utils.torch_utils import randn_tensor
class DiagonalGaussianDistribution(object):
def __init__(self, parameters: torch.Tensor, deterministic: bool = False, _dim: int = 1):
# Note: _dim is the new argument added here after copying from diffusers
self.parameters = parameters
self.mean, self.logvar = torch.chunk(parameters, 2, dim=_dim)
self.logvar = torch.clamp(self.logvar, -30.0, 20.0)
self.deterministic = deterministic
self.std = torch.exp(0.5 * self.logvar)
self.var = torch.exp(self.logvar)
if self.deterministic:
self.var = self.std = torch.zeros_like(
self.mean, device=self.parameters.device, dtype=self.parameters.dtype
)
def sample(self, generator: Optional[torch.Generator] = None) -> torch.Tensor:
# make sure sample is on the same device as the parameters and has same dtype
sample = randn_tensor(
self.mean.shape,
generator=generator,
device=self.parameters.device,
dtype=self.parameters.dtype,
)
x = self.mean + self.std * sample
return x
def kl(self, other: "DiagonalGaussianDistribution" = None) -> torch.Tensor:
if self.deterministic:
return torch.Tensor([0.0])
else:
if other is None:
return 0.5 * torch.sum(
torch.pow(self.mean, 2) + self.var - 1.0 - self.logvar,
dim=[1, 2, 3],
)
else:
return 0.5 * torch.sum(
torch.pow(self.mean - other.mean, 2) / other.var
+ self.var / other.var
- 1.0
- self.logvar
+ other.logvar,
dim=[1, 2, 3],
)
def nll(self, sample: torch.Tensor, dims: Tuple[int, ...] = [1, 2, 3]) -> torch.Tensor:
if self.deterministic:
return torch.Tensor([0.0])
logtwopi = np.log(2.0 * np.pi)
return 0.5 * torch.sum(
logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var,
dim=dims,
)
def mode(self) -> torch.Tensor:
return self.mean
@torch.no_grad()
def _expand_linear_with_zeroed_weights(
module: torch.nn.Linear, new_in_features: Optional[int] = None, new_out_features: Optional[int] = None
) -> torch.nn.Linear:
if new_in_features is None:
new_in_features = module.in_features
if new_out_features is None:
new_out_features = module.out_features
bias = getattr(module, "bias", None)
new_module = torch.nn.Linear(new_in_features, new_out_features, bias=bias is not None)
new_module.to(device=module.weight.device, dtype=module.weight.dtype)
new_module.weight.zero_()
new_module.weight.data[: module.weight.data.shape[0], : module.weight.data.shape[1]].copy_(module.weight.data)
if bias is not None:
new_module.bias.zero_()
new_module.bias.data[: bias.data.shape[0]].copy_(bias.data)
return new_module
@torch.no_grad()
def _expand_conv3d_with_zeroed_weights(
module: torch.nn.Linear, new_in_channels: Optional[int] = None, new_out_channels: Optional[int] = None
) -> torch.nn.Conv3d:
if new_in_channels is None:
new_in_channels = module.in_channels
if new_out_channels is None:
new_out_channels = module.out_channels
bias = getattr(module, "bias", None)
new_module = torch.nn.Conv3d(
new_in_channels,
new_out_channels,
kernel_size=module.kernel_size,
stride=module.stride,
padding=module.padding,
dilation=module.dilation,
groups=module.groups,
bias=bias is not None,
)
new_module.to(device=module.weight.device, dtype=module.weight.dtype)
new_module.weight.zero_()
new_module.weight.data[: module.weight.data.shape[0], : module.weight.data.shape[1]].copy_(module.weight.data)
if bias is not None:
new_module.bias.zero_()
new_module.bias.data[: bias.data.shape[0]].copy_(bias.data)
return new_module
|