Spaces:

a-ragab-h-m
/

vrp-shanghai-transformer

Sleeping

vrp-shanghai-transformer / nets /projections.py

Update nets/projections.py

722e008 verified 3 months ago

1.65 kB

	import torch
	import torch.nn as nn
	import math


	class Projections(nn.Module):
	def __init__(self, n_heads, embed_dim):
	super(Projections, self).__init__()

	self.n_heads = n_heads
	self.embed_dim = embed_dim
	self.val_dim = embed_dim // n_heads

	self.W_key = nn.Parameter(torch.Tensor(n_heads, embed_dim, self.val_dim))
	self.W_val = nn.Parameter(torch.Tensor(n_heads, embed_dim, self.val_dim))
	self.W_output = nn.Parameter(torch.Tensor(embed_dim, embed_dim))

	self.init_parameters()

	def init_parameters(self):
	for param in self.parameters():
	stdv = 1. / math.sqrt(param.size(-1))
	param.data.uniform_(-stdv, stdv)

	def forward(self, h):
	"""
	:param h: Tensor of shape (batch_size, graph_size, embed_dim)
	:return: dict with keys: K, V, V_output
	"""
	batch_size, graph_size, input_dim = h.size()
	hflat = h.contiguous().view(-1, input_dim) # (batch_size * graph_size, embed_dim)

	# Compute Keys and Values per head
	shp = (self.n_heads, batch_size, graph_size, self.val_dim)
	K = torch.matmul(hflat, self.W_key).view(shp)
	V = torch.matmul(hflat, self.W_val).view(shp)

	# Compute output projection: (batch_size, graph_size, embed_dim)
	V_output = torch.matmul(h, self.W_output.expand_as(self.W_output))

	return {
	'K': K, # (n_heads, batch_size, graph_size, val_dim)
	'V': V, # (n_heads, batch_size, graph_size, val_dim)
	'V_output': V_output # (batch_size, graph_size, embed_dim)
	}