ghost / network /AEI_Net.py
Jagrut Thakare
v1
9be8aa9
import torch
import torch.nn as nn
import torch.nn.functional as F
from .AADLayer import *
from network.resnet import MLAttrEncoderResnet
def weight_init(m):
if isinstance(m, nn.Linear):
m.weight.data.normal_(0, 0.001)
m.bias.data.zero_()
if isinstance(m, nn.Conv2d):
nn.init.xavier_normal_(m.weight.data)
if isinstance(m, nn.ConvTranspose2d):
nn.init.xavier_normal_(m.weight.data)
def conv4x4(in_c, out_c, norm=nn.BatchNorm2d):
return nn.Sequential(
nn.Conv2d(in_channels=in_c, out_channels=out_c, kernel_size=4, stride=2, padding=1, bias=False),
norm(out_c),
nn.LeakyReLU(0.1, inplace=True)
)
class deconv4x4(nn.Module):
def __init__(self, in_c, out_c, norm=nn.BatchNorm2d):
super(deconv4x4, self).__init__()
self.deconv = nn.ConvTranspose2d(in_channels=in_c, out_channels=out_c, kernel_size=4, stride=2, padding=1, bias=False)
self.bn = norm(out_c)
self.lrelu = nn.LeakyReLU(0.1, inplace=True)
def forward(self, input, skip, backbone):
x = self.deconv(input)
x = self.bn(x)
x = self.lrelu(x)
if backbone == 'linknet':
return x+skip
else:
return torch.cat((x, skip), dim=1)
class MLAttrEncoder(nn.Module):
def __init__(self, backbone):
super(MLAttrEncoder, self).__init__()
self.backbone = backbone
self.conv1 = conv4x4(3, 32)
self.conv2 = conv4x4(32, 64)
self.conv3 = conv4x4(64, 128)
self.conv4 = conv4x4(128, 256)
self.conv5 = conv4x4(256, 512)
self.conv6 = conv4x4(512, 1024)
self.conv7 = conv4x4(1024, 1024)
if backbone == 'unet':
self.deconv1 = deconv4x4(1024, 1024)
self.deconv2 = deconv4x4(2048, 512)
self.deconv3 = deconv4x4(1024, 256)
self.deconv4 = deconv4x4(512, 128)
self.deconv5 = deconv4x4(256, 64)
self.deconv6 = deconv4x4(128, 32)
elif backbone == 'linknet':
self.deconv1 = deconv4x4(1024, 1024)
self.deconv2 = deconv4x4(1024, 512)
self.deconv3 = deconv4x4(512, 256)
self.deconv4 = deconv4x4(256, 128)
self.deconv5 = deconv4x4(128, 64)
self.deconv6 = deconv4x4(64, 32)
self.apply(weight_init)
def forward(self, Xt):
feat1 = self.conv1(Xt)
# 32x128x128
feat2 = self.conv2(feat1)
# 64x64x64
feat3 = self.conv3(feat2)
# 128x32x32
feat4 = self.conv4(feat3)
# 256x16xx16
feat5 = self.conv5(feat4)
# 512x8x8
feat6 = self.conv6(feat5)
# 1024x4x4
z_attr1 = self.conv7(feat6)
# 1024x2x2
z_attr2 = self.deconv1(z_attr1, feat6, self.backbone)
z_attr3 = self.deconv2(z_attr2, feat5, self.backbone)
z_attr4 = self.deconv3(z_attr3, feat4, self.backbone)
z_attr5 = self.deconv4(z_attr4, feat3, self.backbone)
z_attr6 = self.deconv5(z_attr5, feat2, self.backbone)
z_attr7 = self.deconv6(z_attr6, feat1, self.backbone)
z_attr8 = F.interpolate(z_attr7, scale_factor=2, mode='bilinear', align_corners=True)
return z_attr1, z_attr2, z_attr3, z_attr4, z_attr5, z_attr6, z_attr7, z_attr8
class AADGenerator(nn.Module):
def __init__(self, backbone, c_id=256, num_blocks=2):
super(AADGenerator, self).__init__()
self.up1 = nn.ConvTranspose2d(c_id, 1024, kernel_size=2, stride=1, padding=0)
self.AADBlk1 = AAD_ResBlk(1024, 1024, 1024, c_id, num_blocks)
if backbone == 'linknet':
self.AADBlk2 = AAD_ResBlk(1024, 1024, 1024, c_id, num_blocks)
self.AADBlk3 = AAD_ResBlk(1024, 1024, 512, c_id, num_blocks)
self.AADBlk4 = AAD_ResBlk(1024, 512, 256, c_id, num_blocks)
self.AADBlk5 = AAD_ResBlk(512, 256, 128, c_id, num_blocks)
self.AADBlk6 = AAD_ResBlk(256, 128, 64, c_id, num_blocks)
self.AADBlk7 = AAD_ResBlk(128, 64, 32, c_id, num_blocks)
self.AADBlk8 = AAD_ResBlk(64, 3, 32, c_id, num_blocks)
else:
self.AADBlk2 = AAD_ResBlk(1024, 1024, 2048, c_id, num_blocks)
self.AADBlk3 = AAD_ResBlk(1024, 1024, 1024, c_id, num_blocks)
self.AADBlk4 = AAD_ResBlk(1024, 512, 512, c_id, num_blocks)
self.AADBlk5 = AAD_ResBlk(512, 256, 256, c_id, num_blocks)
self.AADBlk6 = AAD_ResBlk(256, 128, 128, c_id, num_blocks)
self.AADBlk7 = AAD_ResBlk(128, 64, 64, c_id, num_blocks)
self.AADBlk8 = AAD_ResBlk(64, 3, 64, c_id, num_blocks)
self.apply(weight_init)
def forward(self, z_attr, z_id):
m = self.up1(z_id.reshape(z_id.shape[0], -1, 1, 1))
m2 = F.interpolate(self.AADBlk1(m, z_attr[0], z_id), scale_factor=2, mode='bilinear', align_corners=True)
m3 = F.interpolate(self.AADBlk2(m2, z_attr[1], z_id), scale_factor=2, mode='bilinear', align_corners=True)
m4 = F.interpolate(self.AADBlk3(m3, z_attr[2], z_id), scale_factor=2, mode='bilinear', align_corners=True)
m5 = F.interpolate(self.AADBlk4(m4, z_attr[3], z_id), scale_factor=2, mode='bilinear', align_corners=True)
m6 = F.interpolate(self.AADBlk5(m5, z_attr[4], z_id), scale_factor=2, mode='bilinear', align_corners=True)
m7 = F.interpolate(self.AADBlk6(m6, z_attr[5], z_id), scale_factor=2, mode='bilinear', align_corners=True)
m8 = F.interpolate(self.AADBlk7(m7, z_attr[6], z_id), scale_factor=2, mode='bilinear', align_corners=True)
y = self.AADBlk8(m8, z_attr[7], z_id)
return torch.tanh(y)
class AEI_Net(nn.Module):
def __init__(self, backbone, num_blocks=2, c_id=256):
super(AEI_Net, self).__init__()
if backbone in ['unet', 'linknet']:
self.encoder = MLAttrEncoder(backbone)
elif backbone == 'resnet':
self.encoder = MLAttrEncoderResnet()
self.generator = AADGenerator(backbone, c_id, num_blocks)
def forward(self, Xt, z_id):
attr = self.encoder(Xt)
Y = self.generator(attr, z_id)
return Y, attr
def get_attr(self, X):
return self.encoder(X)