File size: 4,008 Bytes
9622166 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
#including neccessary libraires
import math
import torch.nn as nn
import torch
#using the positional feed-forward network to expand dimension for model
class position_wide_feed_forward(nn.Module):
def __init__(self, dimension_for_model, dimension_for_network, dropout = 0.1):
'''
A Constructor for the positional feed forward network
dimensin_for_model: the manually decided dimension that's used in the embeddings layer
dimension_for_network: the dimension needed to expand the embedded results into
dropout: optional dropout to wipe out specific columns and rows of the matrix to improve the model's abilities during training
'''
super().__init__() # Initializing the parent class - 'neural-networks'
self.expansion = nn.Linear(dimension_for_model, dimension_for_network) # Expanding the original batch taken from the multi-head attention into newer ones with the desired dimensions
self.apply_dropout = nn.Dropout(dropout) # Creating the dropout layer for improving the model's ability through testing and training by replacing specific rows and columns with 0s
self.activation = nn.ReLU() # Introducing non-linearity into the encoder and allowing models to represent values non-linearly
self.reverse_expansion = nn.Linear(dimension_for_network, dimension_for_model) # Reducing the dimension from expanded into original
def forward(self, x):
'''
Applying the process of the positional feed_forward function
x: the data which the positional feed forward is applied to
'''
parsed = self.apply_dropout(self.activation(self.expansion(x)))
return self.reverse_expansion(parsed)
# Using the layer_normalization to add outputs back and then normalize the layer
class Residual_layer (nn.Module):
def __init__(self, dimension_for_model, dropout = 0.1):
'''
A Constructor for the Residual and Normalization Layer
dropout: optional dropout to wipe out specific columns and rows of the matrix to improve the model's abilities during training
dimension_for_model: The desired dimension from the embeddings layer
'''
super().__init__()
self.normalize = nn.LayerNorm(dimension_for_model) # Creating the layer normalization
self.apply_dropout = nn.Dropout(dropout)
def forward(self, input_tensor, sublayer_tensor):
'''
input_tensor: the collection of tensor sum at the current stage
sublayer_tensor: the tensor from the specific sublayer and still needed to be added
'''
result = self.apply_dropout(sublayer_tensor)+input_tensor # Adding two results together, since both are of same dimension to enforce the positional arguments, but also apply dropout to the new tensor being added
return self.normalize(result) # Return the normalized result
if __name__ == '__main__':
inp = torch.tensor([[
[1.0, 2.0, 3.0, 4.0],
[0.5, 1.5, 2.5, 3.5],
[4.0, 3.0, 2.0, 1.0]
]])
# Instantiate with no dropout
ffn = position_wide_feed_forward(dimension_for_model=4, dimension_for_network=8, dropout=0.0)
# Run it
out = ffn(inp)
# Print to verify shape and nontrivial transform
print("Input:", inp)
print("Output:", out)
print("Output shape:", out.shape)
x = torch.tensor([[[1.0, 2.0, 3.0, 4.0],
[4.0, 3.0, 2.0, 1.0],
[0.5, 1.5, 2.5, 3.5]]])
# Dummy “sublayer” output to add
sub = torch.tensor([[[0.1, 0.1, 0.1, 0.1],
[0.2, 0.2, 0.2, 0.2],
[0.3, 0.3, 0.3, 0.3]]])
# Instantiate your residual+norm block (no dropout)
layer = Residual_layer(dimension_for_model=4, dropout=0.0)
# Run
out = layer(x, sub)
# Print everything
print("Input X:\n", x)
print("\nSublayer output:\n", sub)
print("\nResidual+Norm output:\n", out)
|