Spaces:

a-ragab-h-m
/

vrp-shanghai-transformer

Sleeping

a-ragab-h-m commited on Jun 3

Commit

722e008

verified ·

1 Parent(s): e61766c

Update nets/projections.py

Files changed (1) hide show

nets/projections.py CHANGED Viewed

@@ -13,7 +13,7 @@ class Projections(nn.Module):
         self.W_key = nn.Parameter(torch.Tensor(n_heads, embed_dim, self.val_dim))
         self.W_val = nn.Parameter(torch.Tensor(n_heads, embed_dim, self.val_dim))
-        self.W_output = nn.Parameter(torch.Tensor(1, embed_dim, embed_dim))
         self.init_parameters()
@@ -24,23 +24,22 @@ class Projections(nn.Module):
     def forward(self, h):
         """
-        :param h: (batch_size, graph_size, embed_dim)
-        :return: dict with keys K, V, V_output for attention
         """
         batch_size, graph_size, input_dim = h.size()
-        hflat = h.view(-1, input_dim)  # (batch_size * graph_size, embed_dim)
         shp = (self.n_heads, batch_size, graph_size, self.val_dim)
-        # Apply projections
-        K = torch.matmul(hflat, self.W_key).view(shp)       # (n_heads, batch_size, graph_size, val_dim)
-        V = torch.matmul(hflat, self.W_val).view(shp)       # (n_heads, batch_size, graph_size, val_dim)
-        # Output projection
-        V_output = torch.bmm(h, self.W_output.repeat(batch_size, 1, 1))  # (batch_size, graph_size, embed_dim)
         return {
-            'K': K,
-            'V': V,
-            'V_output': V_output
         }

         self.W_key = nn.Parameter(torch.Tensor(n_heads, embed_dim, self.val_dim))
         self.W_val = nn.Parameter(torch.Tensor(n_heads, embed_dim, self.val_dim))
+        self.W_output = nn.Parameter(torch.Tensor(embed_dim, embed_dim))
         self.init_parameters()
     def forward(self, h):
         """
+        :param h: Tensor of shape (batch_size, graph_size, embed_dim)
+        :return: dict with keys: K, V, V_output
         """
         batch_size, graph_size, input_dim = h.size()
+        hflat = h.contiguous().view(-1, input_dim)  # (batch_size * graph_size, embed_dim)
+        # Compute Keys and Values per head
         shp = (self.n_heads, batch_size, graph_size, self.val_dim)
+        K = torch.matmul(hflat, self.W_key).view(shp)
+        V = torch.matmul(hflat, self.W_val).view(shp)
+        # Compute output projection: (batch_size, graph_size, embed_dim)
+        V_output = torch.matmul(h, self.W_output.expand_as(self.W_output))
         return {
+            'K': K,             # (n_heads, batch_size, graph_size, val_dim)
+            'V': V,             # (n_heads, batch_size, graph_size, val_dim)
+            'V_output': V_output  # (batch_size, graph_size, embed_dim)
         }