image-matching-webui

Running

image-matching-webui / third_party /ALIKE /alike.py

Vincentqyw

fix: roma

c74a070 over 2 years ago

6.46 kB

	import logging
	import os
	import cv2
	import torch
	from copy import deepcopy
	import torch.nn.functional as F
	from torchvision.transforms import ToTensor
	import math

	from alnet import ALNet
	from soft_detect import DKD
	import time

	configs = {
	"alike-t": {
	"c1": 8,
	"c2": 16,
	"c3": 32,
	"c4": 64,
	"dim": 64,
	"single_head": True,
	"radius": 2,
	"model_path": os.path.join(os.path.split(__file__)[0], "models", "alike-t.pth"),
	},
	"alike-s": {
	"c1": 8,
	"c2": 16,
	"c3": 48,
	"c4": 96,
	"dim": 96,
	"single_head": True,
	"radius": 2,
	"model_path": os.path.join(os.path.split(__file__)[0], "models", "alike-s.pth"),
	},
	"alike-n": {
	"c1": 16,
	"c2": 32,
	"c3": 64,
	"c4": 128,
	"dim": 128,
	"single_head": True,
	"radius": 2,
	"model_path": os.path.join(os.path.split(__file__)[0], "models", "alike-n.pth"),
	},
	"alike-l": {
	"c1": 32,
	"c2": 64,
	"c3": 128,
	"c4": 128,
	"dim": 128,
	"single_head": False,
	"radius": 2,
	"model_path": os.path.join(os.path.split(__file__)[0], "models", "alike-l.pth"),
	},
	}


	class ALike(ALNet):
	def __init__(
	self,
	# ================================== feature encoder
	c1: int = 32,
	c2: int = 64,
	c3: int = 128,
	c4: int = 128,
	dim: int = 128,
	single_head: bool = False,
	# ================================== detect parameters
	radius: int = 2,
	top_k: int = 500,
	scores_th: float = 0.5,
	n_limit: int = 5000,
	device: str = "cpu",
	model_path: str = "",
	):
	super().__init__(c1, c2, c3, c4, dim, single_head)
	self.radius = radius
	self.top_k = top_k
	self.n_limit = n_limit
	self.scores_th = scores_th
	self.dkd = DKD(
	radius=self.radius,
	top_k=self.top_k,
	scores_th=self.scores_th,
	n_limit=self.n_limit,
	)
	self.device = device

	if model_path != "":
	state_dict = torch.load(model_path, self.device)
	self.load_state_dict(state_dict)
	self.to(self.device)
	self.eval()
	logging.info(f"Loaded model parameters from {model_path}")
	logging.info(
	f"Number of model parameters: {sum(p.numel() for p in self.parameters() if p.requires_grad) / 1e3}KB"
	)

	def extract_dense_map(self, image, ret_dict=False):
	# ====================================================
	# check image size, should be integer multiples of 2^5
	# if it is not a integer multiples of 2^5, padding zeros
	device = image.device
	b, c, h, w = image.shape
	h_ = math.ceil(h / 32) * 32 if h % 32 != 0 else h
	w_ = math.ceil(w / 32) * 32 if w % 32 != 0 else w
	if h_ != h:
	h_padding = torch.zeros(b, c, h_ - h, w, device=device)
	image = torch.cat([image, h_padding], dim=2)
	if w_ != w:
	w_padding = torch.zeros(b, c, h_, w_ - w, device=device)
	image = torch.cat([image, w_padding], dim=3)
	# ====================================================

	scores_map, descriptor_map = super().forward(image)

	# ====================================================
	if h_ != h or w_ != w:
	descriptor_map = descriptor_map[:, :, :h, :w]
	scores_map = scores_map[:, :, :h, :w] # Bx1xHxW
	# ====================================================

	# BxCxHxW
	descriptor_map = torch.nn.functional.normalize(descriptor_map, p=2, dim=1)

	if ret_dict:
	return {
	"descriptor_map": descriptor_map,
	"scores_map": scores_map,
	}
	else:
	return descriptor_map, scores_map

	def forward(self, img, image_size_max=99999, sort=False, sub_pixel=False):
	"""
	:param img: np.array HxWx3, RGB
	:param image_size_max: maximum image size, otherwise, the image will be resized
	:param sort: sort keypoints by scores
	:param sub_pixel: whether to use sub-pixel accuracy
	:return: a dictionary with 'keypoints', 'descriptors', 'scores', and 'time'
	"""
	H, W, three = img.shape
	assert three == 3, "input image shape should be [HxWx3]"

	# ==================== image size constraint
	image = deepcopy(img)
	max_hw = max(H, W)
	if max_hw > image_size_max:
	ratio = float(image_size_max / max_hw)
	image = cv2.resize(image, dsize=None, fx=ratio, fy=ratio)

	# ==================== convert image to tensor
	image = (
	torch.from_numpy(image)
	.to(self.device)
	.to(torch.float32)
	.permute(2, 0, 1)[None]
	/ 255.0
	)

	# ==================== extract keypoints
	start = time.time()

	with torch.no_grad():
	descriptor_map, scores_map = self.extract_dense_map(image)
	keypoints, descriptors, scores, _ = self.dkd(
	scores_map, descriptor_map, sub_pixel=sub_pixel
	)
	keypoints, descriptors, scores = keypoints[0], descriptors[0], scores[0]
	keypoints = (keypoints + 1) / 2 * keypoints.new_tensor([[W - 1, H - 1]])

	if sort:
	indices = torch.argsort(scores, descending=True)
	keypoints = keypoints[indices]
	descriptors = descriptors[indices]
	scores = scores[indices]

	end = time.time()

	return {
	"keypoints": keypoints.cpu().numpy(),
	"descriptors": descriptors.cpu().numpy(),
	"scores": scores.cpu().numpy(),
	"scores_map": scores_map.cpu().numpy(),
	"time": end - start,
	}


	if __name__ == "__main__":
	import numpy as np
	from thop import profile

	net = ALike(c1=32, c2=64, c3=128, c4=128, dim=128, single_head=False)

	image = np.random.random((640, 480, 3)).astype(np.float32)
	flops, params = profile(net, inputs=(image, 9999, False), verbose=False)
	print("{:<30} {:<8} GFLops".format("Computational complexity: ", flops / 1e9))
	print("{:<30} {:<8} KB".format("Number of parameters: ", params / 1e3))