"""
Copyright (c) Meta Platforms, Inc. and affiliates.
All rights reserved.
This source code is licensed under the license found in the
LICENSE file in the root directory of this source tree.
"""


import logging
from turtle import forward

import visualize.ca_body.nn.layers as la
from visualize.ca_body.nn.layers import weight_norm_wrapper

import numpy as np
import torch as th
import torch.nn as nn
import torch.nn.functional as F


logger = logging.getLogger(__name__)


# pyre-ignore
def weights_initializer(lrelu_slope=0.2):
    # pyre-ignore
    def init_fn(m):
        if isinstance(
            m,
            (
                nn.Conv2d,
                nn.Conv1d,
                nn.ConvTranspose2d,
                nn.Linear,
            ),
        ):
            gain = nn.init.calculate_gain("leaky_relu", lrelu_slope)
            nn.init.kaiming_uniform_(m.weight.data, a=gain)
            if hasattr(m, "bias") and m.bias is not None:
                nn.init.zeros_(m.bias.data)
        else:
            logger.debug(f"skipping initialization for {m}")

    return init_fn


# pyre-ignore
def WeightNorm(x, dim=0):
    return nn.utils.weight_norm(x, dim=dim)


# pyre-ignore
def np_warp_bias(uv_size):
    xgrid, ygrid = np.meshgrid(np.linspace(-1.0, 1.0, uv_size), np.linspace(-1.0, 1.0, uv_size))
    grid = np.concatenate((xgrid[None, :, :], ygrid[None, :, :]), axis=0)[None, ...].astype(
        np.float32
    )
    return grid


class Conv2dBias(nn.Conv2d):
    __annotations__ = {"bias": th.Tensor}

    def __init__(
        self,
        in_channels,
        out_channels,
        kernel_size,
        size,
        stride=1,
        padding=1,
        bias=True,
        *args,
        **kwargs,
    ):
        super().__init__(
            in_channels,
            out_channels,
            bias=False,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            *args,
            **kwargs,
        )
        if not bias:
            logger.warning("ignoring bias=False")
        self.bias = nn.Parameter(th.zeros(out_channels, size, size))

    def forward(self, x):
        bias = self.bias.clone()
        return (
            # pyre-ignore
            th.conv2d(
                x,
                self.weight,
                bias=None,
                stride=self.stride,
                # pyre-ignore
                padding=self.padding,
                dilation=self.dilation,
                groups=self.groups,
            )
            + bias[np.newaxis]
        )


class Conv1dBias(nn.Conv1d):
    def __init__(
        self,
        in_channels,
        out_channels,
        kernel_size,
        size,
        stride=1,
        padding=0,
        bias=True,
        *args,
        **kwargs,
    ):
        super().__init__(
            in_channels,
            out_channels,
            bias=False,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            *args,
            **kwargs,
        )
        if not bias:
            logger.warning("ignoring bias=False")
        self.bias = nn.Parameter(th.zeros(out_channels, size))

    def forward(self, x):
        return (
            # pyre-ignore
            th.conv1d(
                x,
                self.weight,
                bias=None,
                stride=self.stride,
                # pyre-ignore
                padding=self.padding,
                dilation=self.dilation,
                groups=self.groups,
            )
            + self.bias
        )


class UpConvBlock(nn.Module):
    # pyre-ignore
    def __init__(self, in_channels, out_channels, size, lrelu_slope=0.2):
        super().__init__()
        # Intergration: it was not exist in github, but assume upsample is same as other class
        self.upsample = nn.UpsamplingBilinear2d(size)
        self.conv_resize = la.Conv2dWN(
            in_channels=in_channels, out_channels=out_channels, kernel_size=1
        )
        self.conv1 = la.Conv2dWNUB(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=3,
            height=size,
            width=size,
            padding=1,
        )
        self.lrelu1 = nn.LeakyReLU(lrelu_slope)
        # self.conv2 = nn.utils.weight_norm(
        #     Conv2dBias(in_channels, out_channels, kernel_size=3, size=size), dim=None,
        # )
        # self.lrelu2 = nn.LeakyReLU(lrelu_slope)

    # pyre-ignore
    def forward(self, x):
        x_up = self.upsample(x)
        x_skip = self.conv_resize(x_up)
        x = self.conv1(x_up)
        x = self.lrelu1(x)
        return x + x_skip


class ConvBlock1d(nn.Module):
    def __init__(
        self,
        in_channels,
        out_channels,
        size,
        lrelu_slope=0.2,
        kernel_size=3,
        padding=1,
        wnorm_dim=0,
    ):
        super().__init__()

        self.conv_resize = WeightNorm(
            nn.Conv1d(in_channels, out_channels, kernel_size=1), dim=wnorm_dim
        )
        self.conv1 = WeightNorm(
            Conv1dBias(
                in_channels,
                in_channels,
                kernel_size=kernel_size,
                padding=padding,
                size=size,
            ),
            dim=wnorm_dim,
        )
        self.lrelu1 = nn.LeakyReLU(lrelu_slope)
        self.conv2 = WeightNorm(
            Conv1dBias(
                in_channels,
                out_channels,
                kernel_size=kernel_size,
                padding=padding,
                size=size,
            ),
            dim=wnorm_dim,
        )
        self.lrelu2 = nn.LeakyReLU(lrelu_slope)

    def forward(self, x):
        x_skip = self.conv_resize(x)
        x = self.conv1(x)
        x = self.lrelu1(x)
        x = self.conv2(x)
        x = self.lrelu2(x)
        return x + x_skip


class ConvBlock(nn.Module):
    def __init__(
        self,
        in_channels,
        out_channels,
        size,
        lrelu_slope=0.2,
        kernel_size=3,
        padding=1,
        wnorm_dim=0,
    ):
        super().__init__()

        Conv2dWNUB = weight_norm_wrapper(la.Conv2dUB, "Conv2dWNUB", g_dim=wnorm_dim, v_dim=None)
        Conv2dWN = weight_norm_wrapper(th.nn.Conv2d, "Conv2dWN", g_dim=wnorm_dim, v_dim=None)

        # TODO: do we really need this?
        self.conv_resize = Conv2dWN(in_channels, out_channels, kernel_size=1)
        self.conv1 = Conv2dWNUB(
            in_channels,
            in_channels,
            kernel_size=kernel_size,
            padding=padding,
            height=size,
            width=size,
        )

        self.lrelu1 = nn.LeakyReLU(lrelu_slope)
        self.conv2 = Conv2dWNUB(
            in_channels,
            out_channels,
            kernel_size=kernel_size,
            padding=padding,
            height=size,
            width=size,
        )
        self.lrelu2 = nn.LeakyReLU(lrelu_slope)

    def forward(self, x):
        x_skip = self.conv_resize(x)
        x = self.conv1(x)
        x = self.lrelu1(x)
        x = self.conv2(x)
        x = self.lrelu2(x)
        return x + x_skip


class ConvBlockNoSkip(nn.Module):
    def __init__(
        self,
        in_channels,
        out_channels,
        size,
        lrelu_slope=0.2,
        kernel_size=3,
        padding=1,
        wnorm_dim=0,
    ):
        super().__init__()

        self.conv1 = WeightNorm(
            Conv2dBias(
                in_channels,
                in_channels,
                kernel_size=kernel_size,
                padding=padding,
                size=size,
            ),
            dim=wnorm_dim,
        )
        self.lrelu1 = nn.LeakyReLU(lrelu_slope)
        self.conv2 = WeightNorm(
            Conv2dBias(
                in_channels,
                out_channels,
                kernel_size=kernel_size,
                padding=padding,
                size=size,
            ),
            dim=wnorm_dim,
        )
        self.lrelu2 = nn.LeakyReLU(lrelu_slope)

    def forward(self, x):
        x = self.conv1(x)
        x = self.lrelu1(x)
        x = self.conv2(x)
        x = self.lrelu2(x)
        return x


class ConvDownBlock(nn.Module):
    def __init__(self, in_channels, out_channels, size, lrelu_slope=0.2, groups=1, wnorm_dim=0):
        """Constructor.

        Args:
            in_channels: int, # of input channels
            out_channels: int, # of input channels
            size: the *input* size
        """
        super().__init__()

        Conv2dWNUB = weight_norm_wrapper(la.Conv2dUB, "Conv2dWNUB", g_dim=wnorm_dim, v_dim=None)
        Conv2dWN = weight_norm_wrapper(th.nn.Conv2d, "Conv2dWN", g_dim=wnorm_dim, v_dim=None)

        self.conv_resize = Conv2dWN(
            in_channels, out_channels, kernel_size=1, stride=2, groups=groups
        )
        self.conv1 = Conv2dWNUB(
            in_channels,
            in_channels,
            kernel_size=3,
            height=size,
            width=size,
            groups=groups,
            padding=1,
        )
        self.lrelu1 = nn.LeakyReLU(lrelu_slope)

        self.conv2 = Conv2dWNUB(
            in_channels,
            out_channels,
            kernel_size=3,
            stride=2,
            height=size // 2,
            width=size // 2,
            groups=groups,
            padding=1,
        )
        self.lrelu2 = nn.LeakyReLU(lrelu_slope)

    def forward(self, x):
        x_skip = self.conv_resize(x)
        x = self.conv1(x)
        x = self.lrelu1(x)
        x = self.conv2(x)
        x = self.lrelu2(x)
        return x + x_skip


class UpConvBlockDeep(nn.Module):
    def __init__(self, in_channels, out_channels, size, lrelu_slope=0.2, wnorm_dim=0, groups=1):
        super().__init__()
        self.upsample = nn.UpsamplingBilinear2d(size)

        Conv2dWNUB = weight_norm_wrapper(la.Conv2dUB, "Conv2dWNUB", g_dim=wnorm_dim, v_dim=None)
        Conv2dWN = weight_norm_wrapper(th.nn.Conv2d, "Conv2dWN", g_dim=wnorm_dim, v_dim=None)
        # NOTE: the old one normalizes only across one dimension

        self.conv_resize = Conv2dWN(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=1,
            groups=groups,
        )
        self.conv1 = Conv2dWNUB(
            in_channels,
            in_channels,
            kernel_size=3,
            height=size,
            width=size,
            padding=1,
            groups=groups,
        )
        self.lrelu1 = nn.LeakyReLU(lrelu_slope)
        self.conv2 = Conv2dWNUB(
            in_channels,
            out_channels,
            kernel_size=3,
            height=size,
            width=size,
            padding=1,
            groups=groups,
        )
        self.lrelu2 = nn.LeakyReLU(lrelu_slope)

    def forward(self, x):
        x_up = self.upsample(x)
        x_skip = self.conv_resize(x_up)

        x = x_up
        x = self.conv1(x)
        x = self.lrelu1(x)
        x = self.conv2(x)
        x = self.lrelu2(x)

        return x + x_skip


class ConvBlockPositional(nn.Module):
    def __init__(
        self,
        in_channels,
        out_channels,
        pos_map,
        lrelu_slope=0.2,
        kernel_size=3,
        padding=1,
        wnorm_dim=0,
    ):
        """Block with positional encoding.

        Args:
            in_channels: # of input channels (not counting the positional encoding)
            out_channels: # of output channels
            pos_map: tensor [P, size, size]
        """
        super().__init__()
        assert len(pos_map.shape) == 3 and pos_map.shape[1] == pos_map.shape[2]
        self.register_buffer("pos_map", pos_map)

        self.conv_resize = WeightNorm(nn.Conv2d(in_channels, out_channels, 1), dim=wnorm_dim)

        self.conv1 = WeightNorm(
            nn.Conv2d(
                in_channels + pos_map.shape[0],
                in_channels,
                kernel_size=3,
                padding=padding,
            ),
            dim=wnorm_dim,
        )
        self.lrelu1 = nn.LeakyReLU(lrelu_slope)
        self.conv2 = WeightNorm(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=padding),
            dim=wnorm_dim,
        )
        self.lrelu2 = nn.LeakyReLU(lrelu_slope)

    def forward(self, x):
        B = x.shape[0]

        x_skip = self.conv_resize(x)

        pos = self.pos_map[np.newaxis].expand(B, -1, -1, -1)

        x = th.cat([x, pos], dim=1)
        x = self.conv1(x)
        x = self.lrelu1(x)
        x = self.conv2(x)
        x = self.lrelu2(x)
        return x + x_skip


class UpConvBlockPositional(nn.Module):
    def __init__(
        self,
        in_channels,
        out_channels,
        pos_map,
        lrelu_slope=0.2,
        wnorm_dim=0,
    ):
        """Block with positional encoding.

        Args:
            in_channels: # of input channels (not counting the positional encoding)
            out_channels: # of output channels
            pos_map: tensor [P, size, size]
        """
        super().__init__()
        assert len(pos_map.shape) == 3 and pos_map.shape[1] == pos_map.shape[2]
        self.register_buffer("pos_map", pos_map)
        size = pos_map.shape[1]

        self.in_channels = in_channels
        self.out_channels = out_channels

        self.upsample = nn.UpsamplingBilinear2d(size)

        if in_channels != out_channels:
            self.conv_resize = WeightNorm(nn.Conv2d(in_channels, out_channels, 1), dim=wnorm_dim)

        self.conv1 = WeightNorm(
            nn.Conv2d(
                in_channels + pos_map.shape[0],
                in_channels,
                kernel_size=3,
                padding=1,
            ),
            dim=wnorm_dim,
        )
        self.lrelu1 = nn.LeakyReLU(lrelu_slope)
        self.conv2 = WeightNorm(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            dim=wnorm_dim,
        )
        self.lrelu2 = nn.LeakyReLU(lrelu_slope)

    def forward(self, x):
        B = x.shape[0]

        x_up = self.upsample(x)

        x_skip = x_up
        if self.in_channels != self.out_channels:
            x_skip = self.conv_resize(x_up)

        pos = self.pos_map[np.newaxis].expand(B, -1, -1, -1)

        x = th.cat([x_up, pos], dim=1)
        x = self.conv1(x)
        x = self.lrelu1(x)
        x = self.conv2(x)
        x = self.lrelu2(x)

        return x + x_skip


class UpConvBlockDeepNoBias(nn.Module):
    def __init__(self, in_channels, out_channels, size, lrelu_slope=0.2, wnorm_dim=0, groups=1):
        super().__init__()
        self.upsample = nn.UpsamplingBilinear2d(size)
        # NOTE: the old one normalizes only across one dimension
        self.conv_resize = WeightNorm(
            nn.Conv2d(in_channels, out_channels, 1, groups=groups), dim=wnorm_dim
        )
        self.conv1 = WeightNorm(
            nn.Conv2d(in_channels, in_channels, padding=1, kernel_size=3, groups=groups),
            dim=wnorm_dim,
        )
        self.lrelu1 = nn.LeakyReLU(lrelu_slope)
        self.conv2 = WeightNorm(
            nn.Conv2d(in_channels, out_channels, padding=1, kernel_size=3, groups=groups),
            dim=wnorm_dim,
        )
        self.lrelu2 = nn.LeakyReLU(lrelu_slope)

    def forward(self, x):
        x_up = self.upsample(x)
        x_skip = self.conv_resize(x_up)

        x = x_up
        x = self.conv1(x)
        x = self.lrelu1(x)
        x = self.conv2(x)
        x = self.lrelu2(x)

        return x + x_skip


class UpConvBlockXDeep(nn.Module):
    def __init__(self, in_channels, out_channels, size, lrelu_slope=0.2, wnorm_dim=0):
        super().__init__()
        self.upsample = nn.UpsamplingBilinear2d(size)
        # TODO: see if this is necce
        self.conv_resize = WeightNorm(nn.Conv2d(in_channels, out_channels, 1), dim=wnorm_dim)
        self.conv1 = WeightNorm(
            Conv2dBias(in_channels, in_channels // 2, kernel_size=3, size=size),
            dim=wnorm_dim,
        )
        self.lrelu1 = nn.LeakyReLU(lrelu_slope)

        self.conv2 = WeightNorm(
            Conv2dBias(in_channels // 2, in_channels // 2, kernel_size=3, size=size),
            dim=wnorm_dim,
        )
        self.lrelu2 = nn.LeakyReLU(lrelu_slope)

        self.conv2 = WeightNorm(
            Conv2dBias(in_channels // 2, in_channels // 2, kernel_size=3, size=size),
            dim=wnorm_dim,
        )
        self.lrelu2 = nn.LeakyReLU(lrelu_slope)

        self.conv3 = WeightNorm(
            Conv2dBias(in_channels // 2, out_channels, kernel_size=3, size=size),
            dim=wnorm_dim,
        )
        self.lrelu3 = nn.LeakyReLU(lrelu_slope)

    def forward(self, x):
        x_up = self.upsample(x)
        x_skip = self.conv_resize(x_up)

        x = x_up
        x = self.conv1(x)
        x = self.lrelu1(x)
        x = self.conv2(x)
        x = self.lrelu2(x)
        x = self.conv3(x)
        x = self.lrelu3(x)

        return x + x_skip


class UpConvCondBlock(nn.Module):
    def __init__(self, in_channels, out_channels, size, cond_channels, lrelu_slope=0.2):
        super().__init__()
        self.upsample = nn.UpsamplingBilinear2d(size)
        self.conv_resize = nn.utils.weight_norm(nn.Conv2d(in_channels, out_channels, 1), dim=None)
        self.conv1 = WeightNorm(
            Conv2dBias(in_channels + cond_channels, in_channels, kernel_size=3, size=size),
        )
        self.lrelu1 = nn.LeakyReLU(lrelu_slope)
        self.conv2 = WeightNorm(
            Conv2dBias(in_channels, out_channels, kernel_size=3, size=size),
        )
        self.lrelu2 = nn.LeakyReLU(lrelu_slope)

    def forward(self, x, cond):
        x_up = self.upsample(x)
        x_skip = self.conv_resize(x_up)

        x = x_up
        x = th.cat([x, cond], dim=1)
        x = self.conv1(x)
        x = self.lrelu1(x)
        x = self.conv2(x)
        x = self.lrelu2(x)

        return x + x_skip


class UpConvBlockPS(nn.Module):
    # pyre-ignore
    def __init__(self, n_in, n_out, size, kernel_size=3, padding=1):
        super().__init__()
        self.conv1 = la.Conv2dWNUB(
            n_in,
            n_out * 4,
            size,
            size,
            kernel_size=kernel_size,
            padding=padding,
        )
        self.lrelu = nn.LeakyReLU(0.2, inplace=True)
        self.ps = nn.PixelShuffle(2)

    def forward(self, x):
        x = self.conv(x)
        x = self.lrelu(x)
        return self.ps(x)


# pyre-ignore
def apply_crop(
    image,
    ymin,
    ymax,
    xmin,
    xmax,
):
    """Crops a region from an image."""
    # NOTE: here we are expecting one of [H, W] [H, W, C] [B, H, W, C]
    if len(image.shape) == 2:
        return image[ymin:ymax, xmin:xmax]
    elif len(image.shape) == 3:
        return image[ymin:ymax, xmin:xmax, :]
    elif len(image.shape) == 4:
        return image[:, ymin:ymax, xmin:xmax, :]
    else:
        raise ValueError("provide a batch of images or a single image")


def tile1d(x, size):
    """Tile a given set of features into a convolutional map.
    Args:
        x: float tensor of shape [N, F]
        size: int or a tuple
    Returns:
        a feature map [N, F, ∑size[0], size[1]]
    """
    # size = size if isinstance(size, tuple) else (size, size)
    return x[:, :, np.newaxis].expand(-1, -1, size)


def tile2d(x, size: int):
    """Tile a given set of features into a convolutional map.

    Args:
        x: float tensor of shape [N, F]
        size: int or a tuple

    Returns:
        a feature map [N, F, size[0], size[1]]
    """
    # size = size if isinstance(size, tuple) else (size, size)
    # NOTE: expecting only int here (!!!)
    return x[:, :, np.newaxis, np.newaxis].expand(-1, -1, size, size)


def sample_negative_idxs(size, *args, **kwargs):
    idxs = th.randperm(size, *args, **kwargs)
    if th.all(idxs == th.arange(size, dtype=idxs.dtype, device=idxs.device)):
        return th.flip(idxs, (0,))
    return idxs


def icnr_init(x, scale=2, init=nn.init.kaiming_normal_):
    ni, nf, h, w = x.shape
    ni2 = int(ni / (scale**2))
    k = init(x.new_zeros([ni2, nf, h, w])).transpose(0, 1)
    k = k.contiguous().view(ni2, nf, -1)
    k = k.repeat(1, 1, scale**2)
    return k.contiguous().view([nf, ni, h, w]).transpose(0, 1)


class PixelShuffleWN(nn.Module):
    """PixelShuffle with the right initialization.

    NOTE: make sure to create this one
    """

    def __init__(self, n_in, n_out, upscale_factor=2):
        super().__init__()
        self.upscale_factor = upscale_factor
        self.n_in = n_in
        self.n_out = n_out
        self.conv = la.Conv2dWN(n_in, n_out * (upscale_factor**2), kernel_size=1, padding=0)
        # NOTE: the bias is 2K?
        self.ps = nn.PixelShuffle(upscale_factor)
        self._init_icnr()

    def _init_icnr(self):
        self.conv.weight_v.data.copy_(icnr_init(self.conv.weight_v.data))
        self.conv.weight_g.data.copy_(
            ((self.conv.weight_v.data**2).sum(dim=[1, 2, 3]) ** 0.5)[:, None, None, None]
        )

    def forward(self, x):
        x = self.conv(x)
        return self.ps(x)


class UpscaleNet(nn.Module):
    def __init__(self, in_channels, out_channels=3, n_ftrs=16, size=1024, upscale_factor=2):
        super().__init__()

        self.conv_block = nn.Sequential(
            la.Conv2dWNUB(in_channels, n_ftrs, size, size, kernel_size=3, padding=1),
            nn.LeakyReLU(0.2, inplace=True),
            la.Conv2dWNUB(n_ftrs, n_ftrs, size, size, kernel_size=3, padding=1),
            nn.LeakyReLU(0.2, inplace=True),
        )

        self.out_block = la.Conv2dWNUB(
            n_ftrs,
            out_channels * upscale_factor**2,
            size,
            size,
            kernel_size=1,
            padding=0,
        )

        self.pixel_shuffle = nn.PixelShuffle(upscale_factor=upscale_factor)
        self.apply(lambda x: la.glorot(x, 0.2))
        self.out_block.apply(weights_initializer(1.0))

    def forward(self, x):
        x = self.conv_block(x)
        x = self.out_block(x)
        return self.pixel_shuffle(x)