Spaces:
Running
Running
| import logging | |
| import os | |
| import cv2 | |
| import torch | |
| from copy import deepcopy | |
| import torch.nn.functional as F | |
| from torchvision.transforms import ToTensor | |
| import math | |
| from alnet import ALNet | |
| from soft_detect import DKD | |
| import time | |
| configs = { | |
| "alike-t": { | |
| "c1": 8, | |
| "c2": 16, | |
| "c3": 32, | |
| "c4": 64, | |
| "dim": 64, | |
| "single_head": True, | |
| "radius": 2, | |
| "model_path": os.path.join(os.path.split(__file__)[0], "models", "alike-t.pth"), | |
| }, | |
| "alike-s": { | |
| "c1": 8, | |
| "c2": 16, | |
| "c3": 48, | |
| "c4": 96, | |
| "dim": 96, | |
| "single_head": True, | |
| "radius": 2, | |
| "model_path": os.path.join(os.path.split(__file__)[0], "models", "alike-s.pth"), | |
| }, | |
| "alike-n": { | |
| "c1": 16, | |
| "c2": 32, | |
| "c3": 64, | |
| "c4": 128, | |
| "dim": 128, | |
| "single_head": True, | |
| "radius": 2, | |
| "model_path": os.path.join(os.path.split(__file__)[0], "models", "alike-n.pth"), | |
| }, | |
| "alike-l": { | |
| "c1": 32, | |
| "c2": 64, | |
| "c3": 128, | |
| "c4": 128, | |
| "dim": 128, | |
| "single_head": False, | |
| "radius": 2, | |
| "model_path": os.path.join(os.path.split(__file__)[0], "models", "alike-l.pth"), | |
| }, | |
| } | |
| class ALike(ALNet): | |
| def __init__( | |
| self, | |
| # ================================== feature encoder | |
| c1: int = 32, | |
| c2: int = 64, | |
| c3: int = 128, | |
| c4: int = 128, | |
| dim: int = 128, | |
| single_head: bool = False, | |
| # ================================== detect parameters | |
| radius: int = 2, | |
| top_k: int = 500, | |
| scores_th: float = 0.5, | |
| n_limit: int = 5000, | |
| device: str = "cpu", | |
| model_path: str = "", | |
| ): | |
| super().__init__(c1, c2, c3, c4, dim, single_head) | |
| self.radius = radius | |
| self.top_k = top_k | |
| self.n_limit = n_limit | |
| self.scores_th = scores_th | |
| self.dkd = DKD( | |
| radius=self.radius, | |
| top_k=self.top_k, | |
| scores_th=self.scores_th, | |
| n_limit=self.n_limit, | |
| ) | |
| self.device = device | |
| if model_path != "": | |
| state_dict = torch.load(model_path, self.device) | |
| self.load_state_dict(state_dict) | |
| self.to(self.device) | |
| self.eval() | |
| logging.info(f"Loaded model parameters from {model_path}") | |
| logging.info( | |
| f"Number of model parameters: {sum(p.numel() for p in self.parameters() if p.requires_grad) / 1e3}KB" | |
| ) | |
| def extract_dense_map(self, image, ret_dict=False): | |
| # ==================================================== | |
| # check image size, should be integer multiples of 2^5 | |
| # if it is not a integer multiples of 2^5, padding zeros | |
| device = image.device | |
| b, c, h, w = image.shape | |
| h_ = math.ceil(h / 32) * 32 if h % 32 != 0 else h | |
| w_ = math.ceil(w / 32) * 32 if w % 32 != 0 else w | |
| if h_ != h: | |
| h_padding = torch.zeros(b, c, h_ - h, w, device=device) | |
| image = torch.cat([image, h_padding], dim=2) | |
| if w_ != w: | |
| w_padding = torch.zeros(b, c, h_, w_ - w, device=device) | |
| image = torch.cat([image, w_padding], dim=3) | |
| # ==================================================== | |
| scores_map, descriptor_map = super().forward(image) | |
| # ==================================================== | |
| if h_ != h or w_ != w: | |
| descriptor_map = descriptor_map[:, :, :h, :w] | |
| scores_map = scores_map[:, :, :h, :w] # Bx1xHxW | |
| # ==================================================== | |
| # BxCxHxW | |
| descriptor_map = torch.nn.functional.normalize(descriptor_map, p=2, dim=1) | |
| if ret_dict: | |
| return { | |
| "descriptor_map": descriptor_map, | |
| "scores_map": scores_map, | |
| } | |
| else: | |
| return descriptor_map, scores_map | |
| def forward(self, img, image_size_max=99999, sort=False, sub_pixel=False): | |
| """ | |
| :param img: np.array HxWx3, RGB | |
| :param image_size_max: maximum image size, otherwise, the image will be resized | |
| :param sort: sort keypoints by scores | |
| :param sub_pixel: whether to use sub-pixel accuracy | |
| :return: a dictionary with 'keypoints', 'descriptors', 'scores', and 'time' | |
| """ | |
| H, W, three = img.shape | |
| assert three == 3, "input image shape should be [HxWx3]" | |
| # ==================== image size constraint | |
| image = deepcopy(img) | |
| max_hw = max(H, W) | |
| if max_hw > image_size_max: | |
| ratio = float(image_size_max / max_hw) | |
| image = cv2.resize(image, dsize=None, fx=ratio, fy=ratio) | |
| # ==================== convert image to tensor | |
| image = ( | |
| torch.from_numpy(image) | |
| .to(self.device) | |
| .to(torch.float32) | |
| .permute(2, 0, 1)[None] | |
| / 255.0 | |
| ) | |
| # ==================== extract keypoints | |
| start = time.time() | |
| with torch.no_grad(): | |
| descriptor_map, scores_map = self.extract_dense_map(image) | |
| keypoints, descriptors, scores, _ = self.dkd( | |
| scores_map, descriptor_map, sub_pixel=sub_pixel | |
| ) | |
| keypoints, descriptors, scores = keypoints[0], descriptors[0], scores[0] | |
| keypoints = (keypoints + 1) / 2 * keypoints.new_tensor([[W - 1, H - 1]]) | |
| if sort: | |
| indices = torch.argsort(scores, descending=True) | |
| keypoints = keypoints[indices] | |
| descriptors = descriptors[indices] | |
| scores = scores[indices] | |
| end = time.time() | |
| return { | |
| "keypoints": keypoints.cpu().numpy(), | |
| "descriptors": descriptors.cpu().numpy(), | |
| "scores": scores.cpu().numpy(), | |
| "scores_map": scores_map.cpu().numpy(), | |
| "time": end - start, | |
| } | |
| if __name__ == "__main__": | |
| import numpy as np | |
| from thop import profile | |
| net = ALike(c1=32, c2=64, c3=128, c4=128, dim=128, single_head=False) | |
| image = np.random.random((640, 480, 3)).astype(np.float32) | |
| flops, params = profile(net, inputs=(image, 9999, False), verbose=False) | |
| print("{:<30} {:<8} GFLops".format("Computational complexity: ", flops / 1e9)) | |
| print("{:<30} {:<8} KB".format("Number of parameters: ", params / 1e3)) | |