Spaces:

HemanM
/

EvoConvo

Sleeping

HemanM commited on Aug 4

Commit

cf057f8

verified ·

1 Parent(s): 95f96da

Update evo_model.py

Files changed (1) hide show

evo_model.py CHANGED Viewed

@@ -4,26 +4,24 @@ import torch.nn as nn
 import math
 class PositionalEncoding(nn.Module):
-    def __init__(self, d_model, max_len=512):  # ✅ Increased from 128 to 512
         super().__init__()
         pe = torch.zeros(max_len, d_model)
         position = torch.arange(0, max_len).unsqueeze(1)
         div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
         pe[:, 0::2] = torch.sin(position * div_term)
         pe[:, 1::2] = torch.cos(position * div_term)
-        pe = pe.unsqueeze(0)  # Shape: (1, max_len, d_model)
         self.register_buffer('pe', pe)
     def forward(self, x):
-        if x.size(1) > self.pe.size(1):
-            raise ValueError(f"Input sequence length {x.size(1)} exceeds max_len {self.pe.size(1)}")
         return x + self.pe[:, :x.size(1)]
 class EvoDecoderModel(nn.Module):
     def __init__(self, vocab_size, d_model=512, nhead=8, num_layers=6, dim_feedforward=2048, dropout=0.1):
         super().__init__()
         self.token_embed = nn.Embedding(vocab_size, d_model)
-        self.pos_encoder = PositionalEncoding(d_model)
         decoder_layer = nn.TransformerDecoderLayer(
             d_model=d_model,
             nhead=nhead,

 import math
 class PositionalEncoding(nn.Module):
+    def __init__(self, d_model, max_len=128):  # ❗ Reverted to 128 to match saved model
         super().__init__()
         pe = torch.zeros(max_len, d_model)
         position = torch.arange(0, max_len).unsqueeze(1)
         div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
         pe[:, 0::2] = torch.sin(position * div_term)
         pe[:, 1::2] = torch.cos(position * div_term)
+        pe = pe.unsqueeze(0)  # (1, max_len, d_model)
         self.register_buffer('pe', pe)
     def forward(self, x):
         return x + self.pe[:, :x.size(1)]
 class EvoDecoderModel(nn.Module):
     def __init__(self, vocab_size, d_model=512, nhead=8, num_layers=6, dim_feedforward=2048, dropout=0.1):
         super().__init__()
         self.token_embed = nn.Embedding(vocab_size, d_model)
+        self.pos_encoder = PositionalEncoding(d_model, max_len=128)  # ❗ Match checkpoint shape
         decoder_layer = nn.TransformerDecoderLayer(
             d_model=d_model,
             nhead=nhead,