Spaces:

jev-aleks
/

SceneDINO

Running on Zero

App Files Files Community

SceneDINO / scenedino /models /backbones /dino /visualization.py

jev-aleks

scenedino init

9e15541 15 days ago

raw

history blame contribute delete

6.54 kB

	# from pykeops.torch import LazyTensor
	from typing import Tuple

	import matplotlib.pyplot as plt
	import torch
	from torch import nn, Tensor


	class VisualizationModule(nn.Module):
	def __init__(self, in_channels, reduce_images=3):
	super().__init__()
	self.batch_rgb_mean = torch.zeros(in_channels)
	self.batch_rgb_comp = torch.eye(in_channels, 3)
	self.reduce_images = reduce_images
	self.fitted_pca = False

	self.n_kmeans_clusters = 8
	self.kmeans_cluster_centers = torch.zeros(self.n_kmeans_clusters, in_channels)
	self.cmap_kmeans = plt.get_cmap("tab10")

	def fit_pca(self, batch_features, refit):
	if batch_features.dim() > 2:
	raise ValueError(f"Wrong dims for PCA: {batch_features.shape}")
	if not self.fitted_pca or refit:
	# filter nan values
	batch_features = batch_features[~torch.isnan(batch_features).any(dim=1)]
	self._pca_fast(batch_features, num_components=3*self.reduce_images)
	self.fitted_pca = True

	def transform_pca(self, features, norm, from_dim):
	features = features - self.batch_rgb_mean
	if norm:
	features = features / torch.linalg.norm(features, dim=-1, keepdim=True)
	return features @ self.batch_rgb_comp[..., from_dim:from_dim+3]

	def _pca_fast(self, data: Tensor, num_components: int = 3) -> Tuple[Tensor, Tensor]:
	"""Function implements PCA using PyTorch fast low-rank approximation.

	Args:
	data (Tensor): Data matrix of the shape [N, C] or [B, N, C].
	num_components (int): Number of principal components to be used.

	Returns:
	data_pca (Tensor): Transformed low-dimensional data of the shape [N, num_components] or [B, N, num_components].
	pca_components (Tensor): Principal components of the shape [num_components, C] or [B, num_components, C].
	"""
	# Normalize data
	data_mean = data.mean(dim=-2, keepdim=True)
	data_normalize = (data - data_mean) / (data.std(dim=-2, keepdim=True) + 1e-08)
	# Perform fast low-rank PCA
	u, _, v = torch.pca_lowrank(data_normalize, q=max(num_components, 6), niter=2, center=True)
	v = v.transpose(-1, -2)
	# Perform SVD flip
	u, v = self._svd_flip(u, v) # type: Tensor, Tensor
	# Transpose PCA components to match scikit-learn
	if data_normalize.ndim == 2:
	pca_components = v[:num_components]
	else:
	pca_components = v[:, :num_components]

	self.batch_rgb_mean = data_mean
	self.batch_rgb_comp = pca_components.transpose(-1, -2)

	def _svd_flip(self, u: Tensor, v: Tensor) -> Tuple[Tensor, Tensor]:
	"""Perform SVD flip to solve sign issue of SVD.

	Args:
	u (Tensor): u matrix of the shape [N, C] or [B, N, C].
	v (Tensor): v matrix of the shape [C, C] or [B, C, C].

	Returns:
	u (Tensor): Fixed u matrix of the shape [N, C] or [B, N, C].
	v (Tensor): Fixed v matrix of the shape [C, C] or [B, C, C].
	"""
	max_abs: Tensor = torch.abs(u).argmax(dim=-2)
	indexes: Tensor = torch.arange(u.shape[-1], device=u.device)
	if u.ndim == 2:
	signs: Tensor = torch.sign(u[max_abs, indexes])
	u = u * signs
	v = v * signs.unsqueeze(dim=-1)
	else:
	# Maybe fix looping the future...
	signs = torch.stack(
	[torch.sign(u[batch_index, max_abs[batch_index], indexes]) for batch_index in range(u.shape[0])], dim=0
	)
	u = u * signs.unsqueeze(dim=1)
	v = v * signs.unsqueeze(dim=-1)
	return u, v

	def old_fit_transform_kmeans_batch(self, batch_features, subsample_size=20000):
	feats_map_flattened = batch_features.flatten(1, -2)
	from torch_kmeans import KMeans, CosineSimilarity
	kmeans_engine = KMeans(n_clusters=self.n_kmeans_clusters, distance=CosineSimilarity)

	n = feats_map_flattened.size(1)
	if subsample_size is not None and subsample_size < n:
	indices = torch.randperm(n)[:subsample_size]
	feats_map_subsampled = feats_map_flattened[:, indices]
	kmeans_engine.fit(feats_map_subsampled)
	else:
	kmeans_engine.fit(feats_map_flattened)

	labels = kmeans_engine.predict(feats_map_flattened)
	labels = labels.reshape(batch_features.shape[:-1]).float().cpu().numpy()

	label_map = self.cmap_kmeans(labels / (self.n_kmeans_clusters - 1))[..., :3]
	label_map = torch.Tensor(label_map).squeeze(-2)

	return label_map

	def fit_transform_kmeans_batch(self, batch_features):
	feats_map_flattened = batch_features.flatten(0, -2)

	with torch.no_grad():
	cl, c = self._KMeans_cosine(feats_map_flattened.float(), K=self.n_kmeans_clusters)
	self.kmeans_cluster_centers = c

	labels = cl.reshape(batch_features.shape[:-1]).float().cpu().numpy()

	label_map = self.cmap_kmeans(labels / (self.n_kmeans_clusters - 1))[..., :3]
	label_map = torch.Tensor(label_map).squeeze(-2)

	return label_map

	def _KMeans_cosine(self, x, K=19, Niter=100):
	"""Implements Lloyd's algorithm for the Cosine similarity metric."""
	N, D = x.shape # Number of samples, dimension of the ambient space

	c = x[:K, :].clone() # Simplistic initialization for the centroids
	# Normalize the centroids for the cosine similarity:
	c[:] = torch.nn.functional.normalize(c, dim=1, p=2)

	x_i = LazyTensor(x.view(N, 1, D)) # (N, 1, D) samples
	c_j = LazyTensor(c.view(1, K, D)) # (1, K, D) centroids

	# K-means loop:
	# - x is the (N, D) point cloud,
	# - cl is the (N,) vector of class labels
	# - c is the (K, D) cloud of cluster centroids
	for i in range(Niter):
	# E step: assign points to the closest cluster -------------------------
	S_ij = x_i \| c_j # (N, K) symbolic Gram matrix of dot products
	cl = S_ij.argmax(dim=1).long().view(-1) # Points -> Nearest cluster

	# M step: update the centroids to the normalized cluster average: ------
	# Compute the sum of points per cluster:
	c.zero_()
	c.scatter_add_(0, cl[:, None].repeat(1, D), x)

	# Normalize the centroids, in place:
	c[:] = torch.nn.functional.normalize(c, dim=1, p=2)

	return cl, c