|
import torch |
|
import math |
|
import torch.nn as nn |
|
|
|
|
|
class PositionalEncoding(nn.Module): |
|
""" |
|
Positional encoding layer for transformer models, with adjustments for lemmatization. |
|
In lemmatization tasks (especially with character-level inputs), sequences are typically |
|
much shorter. Therefore, max_len is set to a lower value to better match the expected input. |
|
|
|
Parameters: |
|
- dimension_for_model: Dimensionality of the embedding vectors. |
|
- dropout: Dropout probability used for regularization. |
|
- max_len: Maximum sequence length; lowered here (e.g., 256) since lemmatization sequences are short. |
|
""" |
|
def __init__(self, dimension_for_model, dropout, max_len=256): |
|
|
|
super(PositionalEncoding, self).__init__() |
|
|
|
self.dropout = nn.Dropout(p=dropout) |
|
|
|
|
|
pos_enc_matrix = torch.zeros(max_len, dimension_for_model) |
|
|
|
|
|
position = torch.arange(0, max_len).unsqueeze(1) |
|
|
|
|
|
div_term = torch.exp(torch.arange(0, dimension_for_model, 2) * -(math.log(10000.0) / dimension_for_model)) |
|
|
|
|
|
pos_enc_matrix[:, 0::2] = torch.sin(position * div_term) |
|
|
|
pos_enc_matrix[:, 1::2] = torch.cos(position * div_term) |
|
|
|
|
|
pos_enc_matrix = pos_enc_matrix.unsqueeze(0) |
|
|
|
|
|
self.register_buffer('pe', pos_enc_matrix) |
|
|
|
def forward(self, x): |
|
""" |
|
Add positional encodings to the input tensor. |
|
x: Tensor of shape [batch_size, sequence_length, dimension_for_model] |
|
""" |
|
|
|
x = x + self.pe[:, :x.size(1)].detach() |
|
return self.dropout(x) |
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
d_model = 512 |
|
dropout_rate = 0.1 |
|
max_len = 256 |
|
|
|
|
|
pos_encoder = PositionalEncoding(dimension_for_model=d_model, dropout=dropout_rate, max_len=max_len) |
|
|
|
|
|
dummy_input = torch.randn(2, 20, d_model) |
|
|
|
|
|
encoded_output = pos_encoder(dummy_input) |
|
|
|
print("Encoded output shape:", encoded_output.shape) |