Spaces:

gregorkrzmanc
/

jetclustering

Sleeping

jetclustering / src /1models /LGATr /lgatr.py

4a9e89a 3 months ago

9.37 kB

	from lgatr import GATr, SelfAttentionConfig, MLPConfig
	from lgatr.interface import embed_vector, extract_scalar, embed_spurions, extract_vector
	import torch
	import torch.nn as nn
	from xformers.ops.fmha import BlockDiagonalMask
	from torch_scatter import scatter_sum, scatter_max, scatter_mean


	class LGATrModel(torch.nn.Module):
	def __init__(self, n_scalars, hidden_mv_channels, hidden_s_channels, blocks, embed_as_vectors, n_scalars_out, return_scalar_coords, obj_score=False, global_featuers_copy=False):
	super().__init__()
	self.return_scalar_coords = return_scalar_coords
	self.n_scalars = n_scalars
	self.hidden_mv_channels = hidden_mv_channels
	self.hidden_s_channels = hidden_s_channels
	self.blocks = blocks
	self.embed_as_vectors = embed_as_vectors
	self.input_dim = 3
	self.n_scalars_out = n_scalars_out
	self.obj_score = obj_score
	self.global_features_copy = global_featuers_copy
	self.gatr = GATr(
	in_mv_channels=3,
	out_mv_channels=1,
	hidden_mv_channels=hidden_mv_channels,
	in_s_channels=n_scalars,
	out_s_channels=n_scalars_out,
	hidden_s_channels=hidden_s_channels,
	num_blocks=blocks,
	attention=SelfAttentionConfig(), # Use default parameters for attention
	mlp=MLPConfig(), # Use default parameters for MLP
	)
	if self.global_features_copy:
	self.gatr_global_features = GATr(
	in_mv_channels=3,
	out_mv_channels=1,
	hidden_mv_channels=hidden_mv_channels,
	in_s_channels=n_scalars,
	out_s_channels=n_scalars_out,
	hidden_s_channels=hidden_s_channels,
	num_blocks=blocks,
	attention=SelfAttentionConfig(), # Use default parameters for attention
	mlp=MLPConfig(), # Use default parameters for MLP
	)
	#self.batch_norm = nn.BatchNorm1d(self.input_dim, momentum=0.1)
	#self.clustering = nn.Linear(3, self.output_dim - 1, bias=False)
	if n_scalars_out > 0:
	if obj_score:
	factor = 1
	if self.global_features_copy: factor = 2
	self.beta = nn.Sequential(
	nn.Linear((n_scalars_out + 1) * factor, 10),
	nn.LeakyReLU(),
	nn.Linear(10, 1),
	#nn.Sigmoid()
	)
	else:
	self.beta = nn.Linear(n_scalars_out + 1, 1)
	else:
	self.beta = None

	def forward(self, data, data_events=None, data_events_clusters=None, cpu_demo=False):
	# data: instance of EventBatch
	if self.global_features_copy:
	assert data_events is not None and data_events_clusters is not None
	assert self.obj_score
	inputs_v = data_events.input_vectors
	inputs_scalar = data_events.input_scalars
	assert inputs_scalar.shape[1] == self.n_scalars, "Expected %d, got %d" % (
	self.n_scalars, inputs_scalar.shape[1])
	mask_global = self.build_attention_mask(data_events.batch_idx)
	embedded_inputs_events = embed_vector(inputs_v.unsqueeze(0))
	multivectors = embedded_inputs_events.unsqueeze(-2)
	spurions = embed_spurions(beam_reference="xyplane", add_time_reference=True,
	device=multivectors.device, dtype=multivectors.dtype)

	num_points, x = inputs_v.shape
	assert x == 4
	spurions = spurions[None, None, ...].repeat(1, num_points, 1, 1) # (batchsize, num_points, 2, 16)
	multivectors = torch.cat((multivectors, spurions), dim=-2)
	embedded_outputs, output_scalars = self.gatr_global_features(
	multivectors, scalars=inputs_scalar, attention_mask=mask_global
	)
	original_scalar = extract_scalar(embedded_outputs)
	scalar_embeddings_nodes = torch.cat([original_scalar[0, :, 0, :], output_scalars[0, :, :]], dim=1)
	scalar_embeddings_global = scatter_mean(scalar_embeddings_nodes, torch.tensor(data_events_clusters).to(scalar_embeddings_nodes.device)+1, dim=0)[1:]

	inputs_v = data.input_vectors.float() # four-momenta
	inputs_scalar = data.input_scalars.float()
	assert inputs_scalar.shape[1] == self.n_scalars
	num_points, x = inputs_v.shape
	assert x == 4
	#velocities = embed_vector(inputs_v)

	inputs_v = inputs_v.unsqueeze(0)
	embedded_inputs = embed_vector(inputs_v)
	# if it contains nans, raise an error
	if torch.isnan(embedded_inputs).any():
	raise ValueError("NaNs in the input!")
	multivectors = embedded_inputs.unsqueeze(-2) # (batch_size*num_points, 1, 16)
	# for spurions, duplicate each unique batch_idx. e.g. [0,0,1,1,2,2] etc.
	#spurions_batch_idx = torch.repeat_interleave(data.batch_idx.unique(), 2)
	#batch_idx = torch.cat([data.batch_idx, spurions_batch_idx])
	spurions = embed_spurions(beam_reference="xyplane", add_time_reference=True,
	device=multivectors.device, dtype=multivectors.dtype)
	spurions = spurions[None, None, ...].repeat(1, num_points, 1, 1) # (batchsize, num_points, 2, 16)
	multivectors = torch.cat((multivectors, spurions), dim=-2) # (batchsize, num_points, 3, 16) - Just embed the spurions as two extra multivector channels
	mask = self.build_attention_mask(data.batch_idx)
	if cpu_demo:
	mask = None
	embedded_outputs, output_scalars = self.gatr(
	multivectors, scalars=inputs_scalar, attention_mask=mask
	)

	#if self.embed_as_vectors:
	# x_clusters = extract_translation(embedded_outputs)
	#else:
	# x_clusters = extract_point(embedded_outputs)
	x_clusters = extract_vector(embedded_outputs)
	original_scalar = extract_scalar(embedded_outputs)
	if self.beta is not None:
	if self.obj_score:
	extract_from_virtual_nodes = False
	# assert that data has fake_nodes_idx from which we read the objectness score
	#assert "fake_nodes_idx" in data.__dict__
	# print batch number 3 and 4 inputs
	#for nbatch in [3, 4]:
	# print("#### batch no. ", nbatch , "#######")
	# print(" -> scalar inputs", inputs_scalar[data.batch_idx==nbatch].shape, inputs_scalar[data.batch_idx == nbatch])
	# print(" -> vector inputs", data.input_vectors[data.batch_idx==nbatch].shape, data.input_vectors[data.batch_idx == nbatch])
	# print("############")
	scalar_embeddings = torch.cat([original_scalar[0, :, 0, :], output_scalars[0, :, :]], dim=1)
	if extract_from_virtual_nodes:
	values = torch.cat([original_scalar[0, data.fake_nodes_idx, 0, :], output_scalars[0, data.fake_nodes_idx, :]], dim=1)
	else:
	values = scatter_mean(scalar_embeddings, data.batch_idx.to(scalar_embeddings.device).long(), dim=0)
	if self.global_features_copy:
	values = torch.cat([values, scalar_embeddings_global], dim=1)
	beta = self.beta(values)
	#beta = self.beta(values)
	return beta
	vals = torch.cat([original_scalar[0, :, 0, :], output_scalars[0, :, :]], dim=1)
	beta = self.beta(vals)
	if self.return_scalar_coords:
	x = output_scalars[0, :, :3]
	#print(x.shape)
	#print(x[:5])
	x = torch.cat((x, torch.sigmoid(beta.view(-1, 1))), dim=1)
	else:
	x = torch.cat((x_clusters[0, :, 0, :], torch.sigmoid(beta.view(-1, 1))), dim=1)
	else:
	x = x_clusters[:, 0, :]
	if torch.isnan(x).any():
	raise ValueError("NaNs in the output!")
	#print(x[:5])
	print("LGATr x shape:", x.shape)
	return x

	def build_attention_mask(self, batch_numbers):
	return BlockDiagonalMask.from_seqlens(
	torch.bincount(batch_numbers.long()).tolist()
	)

	def get_model(args, obj_score=False):
	n_scalars_out = 8
	if args.beta_type == "pt":
	n_scalars_out = 0
	elif args.beta_type == "pt+bc":
	n_scalars_out = 8
	n_scalars_in = 12
	if args.no_pid:
	n_scalars_in = 12 - 9
	if obj_score:
	return LGATrModel(
	n_scalars=n_scalars_in,
	hidden_mv_channels=8,
	hidden_s_channels=16,
	blocks=5,
	embed_as_vectors=False,
	n_scalars_out=n_scalars_out,
	return_scalar_coords=args.scalars_oc,
	obj_score=obj_score,
	global_featuers_copy=args.global_features_obj_score
	)

	return LGATrModel(
	n_scalars=n_scalars_in,
	hidden_mv_channels=args.hidden_mv_channels,
	hidden_s_channels=args.hidden_s_channels,
	blocks=args.num_blocks,
	embed_as_vectors=args.embed_as_vectors,
	n_scalars_out=n_scalars_out,
	return_scalar_coords=args.scalars_oc,
	obj_score=obj_score
	)