HemanM commited on
Commit
1070f67
·
verified ·
1 Parent(s): e8f65bb

Update evo_model.py

Browse files
Files changed (1) hide show
  1. evo_model.py +10 -35
evo_model.py CHANGED
@@ -1,39 +1,14 @@
1
- # evo_model.py
2
- import torch
3
  import torch.nn as nn
4
- import math
5
-
6
- class PositionalEncoding(nn.Module):
7
- def __init__(self, d_model, max_len=128):
8
- super().__init__()
9
- pe = torch.zeros(max_len, d_model)
10
- position = torch.arange(0, max_len).unsqueeze(1)
11
- div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
12
- pe[:, 0::2] = torch.sin(position * div_term)
13
- pe[:, 1::2] = torch.cos(position * div_term)
14
- pe = pe.unsqueeze(0) # (1, max_len, d_model)
15
- self.register_buffer('pe', pe)
16
- self.max_len = max_len
17
-
18
- def forward(self, x):
19
- seq_len = x.size(1)
20
- if seq_len > self.max_len:
21
- raise ValueError(f"Input length {seq_len} exceeds max_len {self.max_len}")
22
- return x + self.pe[:, :seq_len]
23
 
24
  class EvoDecoderModel(nn.Module):
25
  def __init__(self, vocab_size, d_model=512, nhead=8, num_layers=6, dim_feedforward=2048, dropout=0.1):
26
- super().__init__()
27
- self.token_embed = nn.Embedding(vocab_size, d_model)
28
- self.pos_encoder = PositionalEncoding(d_model)
29
- decoder_layer = nn.TransformerDecoderLayer(d_model, nhead, dim_feedforward, dropout, batch_first=True)
30
- self.decoder = nn.TransformerDecoder(decoder_layer, num_layers=num_layers)
31
- self.lm_head = nn.Linear(d_model, vocab_size)
32
-
33
- def forward(self, input_ids):
34
- x = self.token_embed(input_ids)
35
- x = self.pos_encoder(x)
36
- seq_len = x.size(1)
37
- mask = torch.triu(torch.ones(seq_len, seq_len, device=x.device), diagonal=1).bool()
38
- x = self.decoder(x, x, tgt_mask=mask)
39
- return self.lm_head(x)
 
 
 
1
  import torch.nn as nn
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  class EvoDecoderModel(nn.Module):
4
  def __init__(self, vocab_size, d_model=512, nhead=8, num_layers=6, dim_feedforward=2048, dropout=0.1):
5
+ super(EvoDecoderModel, self).__init__()
6
+ self.embedding = nn.Embedding(vocab_size, d_model)
7
+ decoder_layer = nn.TransformerDecoderLayer(d_model, nhead, dim_feedforward, dropout)
8
+ self.transformer_decoder = nn.TransformerDecoder(decoder_layer, num_layers)
9
+ self.output_layer = nn.Linear(d_model, vocab_size)
10
+
11
+ def forward(self, tgt, memory):
12
+ embedded = self.embedding(tgt)
13
+ output = self.transformer_decoder(embedded, memory)
14
+ return self.output_layer(output)