Spaces:
Running
on
Zero
Running
on
Zero
update: missing file
Browse files
imcui/third_party/MatchAnything/src/datasets/common_data_pair.py
ADDED
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os.path as osp
|
2 |
+
import numpy as np
|
3 |
+
import torch
|
4 |
+
import torch.nn.functional as F
|
5 |
+
from torch.utils.data import Dataset
|
6 |
+
from loguru import logger
|
7 |
+
from PIL import Image
|
8 |
+
|
9 |
+
from src.utils.dataset import read_megadepth_gray
|
10 |
+
|
11 |
+
class CommonDataset(Dataset):
|
12 |
+
def __init__(self,
|
13 |
+
root_dir,
|
14 |
+
npz_path,
|
15 |
+
mode='train',
|
16 |
+
min_overlap_score=0.4,
|
17 |
+
img_resize=None,
|
18 |
+
df=None,
|
19 |
+
img_padding=False,
|
20 |
+
depth_padding=False,
|
21 |
+
augment_fn=None,
|
22 |
+
testNpairs=300,
|
23 |
+
fp16=False,
|
24 |
+
fix_bias=False,
|
25 |
+
sample_ratio=1.0,
|
26 |
+
**kwargs):
|
27 |
+
super().__init__()
|
28 |
+
self.root_dir = root_dir
|
29 |
+
self.mode = mode
|
30 |
+
self.scene_id = npz_path.split('.')[0]
|
31 |
+
self.sample_ratio = sample_ratio
|
32 |
+
|
33 |
+
# prepare scene_info and pair_info
|
34 |
+
if mode == 'test' and min_overlap_score > 0:
|
35 |
+
logger.warning("You are using `min_overlap_score`!=0 in test mode. Set to 0.")
|
36 |
+
min_overlap_score = -3.0
|
37 |
+
self.scene_info = np.load(npz_path, allow_pickle=True)
|
38 |
+
if mode == 'test':
|
39 |
+
self.pair_infos = self.scene_info['pair_infos'][:testNpairs].copy()
|
40 |
+
else:
|
41 |
+
self.pair_infos = self.scene_info['pair_infos'].copy()
|
42 |
+
|
43 |
+
# parameters for image resizing, padding and depthmap padding
|
44 |
+
if mode == 'train':
|
45 |
+
assert img_resize is not None and depth_padding
|
46 |
+
self.img_resize = img_resize
|
47 |
+
self.df = df
|
48 |
+
self.img_padding = img_padding
|
49 |
+
|
50 |
+
# for training LoFTR
|
51 |
+
self.augment_fn = augment_fn if mode == 'train' else None
|
52 |
+
self.coarse_scale = getattr(kwargs, 'coarse_scale', 0.125)
|
53 |
+
self.load_origin_rgb = kwargs["load_origin_rgb"]
|
54 |
+
self.read_gray = kwargs["read_gray"]
|
55 |
+
self.normalize_img = kwargs["normalize_img"]
|
56 |
+
self.resize_by_stretch = kwargs["resize_by_stretch"]
|
57 |
+
depth_max_size = 4000 if 'MTV_cross_modal_data' not in npz_path else 6000
|
58 |
+
self.depth_max_size = depth_max_size if depth_padding else 2000 # the upperbound of depthmaps size in megadepth.
|
59 |
+
|
60 |
+
self.dataset_name = self.scene_info['dataset_name'] if "dataset_name" in self.scene_info else npz_path.split(root_dir)[1].split('/')[1]
|
61 |
+
self.gt_matches = self.scene_info['gt_matches'] if 'gt_matches' in self.scene_info else None # sparse matches produced by teacher models, used for training
|
62 |
+
self.gt_matches_padding_n = kwargs["gt_matches_padding_n"]
|
63 |
+
self.gt_2D_warp = self.scene_info['gt_2D_transforms'] if "gt_2D_transforms" in self.scene_info else None
|
64 |
+
self.gt_2D_matches = self.scene_info['gt_2D_matches'] if "gt_2D_matches" in self.scene_info else None # Used for eval
|
65 |
+
self.intrins = self.scene_info['intrinsics'] if 'intrinsics' in self.scene_info else None
|
66 |
+
self.poses = self.scene_info['poses'] if 'poses' in self.scene_info else None
|
67 |
+
|
68 |
+
self.fp16 = fp16
|
69 |
+
self.fix_bias = fix_bias
|
70 |
+
if self.fix_bias:
|
71 |
+
self.df = 1
|
72 |
+
|
73 |
+
def __len__(self):
|
74 |
+
return len(self.pair_infos)
|
75 |
+
|
76 |
+
def __getitem__(self, idx):
|
77 |
+
if isinstance(self.pair_infos[idx], np.ndarray):
|
78 |
+
idx0, idx1 = self.pair_infos[idx][0], self.pair_infos[idx][1]
|
79 |
+
img_path0, img_path1 = self.scene_info['image_paths'][idx0][0], self.scene_info['image_paths'][idx1][1]
|
80 |
+
K_0 = torch.zeros((3,3), dtype=torch.float) if self.intrins is None else torch.from_numpy(self.intrins[idx0][0]).float()
|
81 |
+
K_1 = torch.zeros((3,3), dtype=torch.float) if self.intrins is None else torch.from_numpy(self.intrins[idx1][1]).float()
|
82 |
+
|
83 |
+
else:
|
84 |
+
if len(self.pair_infos[idx]) == 3:
|
85 |
+
(idx0, idx1), overlap_score, central_matches = self.pair_infos[idx]
|
86 |
+
elif len(self.pair_infos[idx]) == 2:
|
87 |
+
(idx0, idx1), overlap_score = self.pair_infos[idx]
|
88 |
+
else:
|
89 |
+
raise NotImplementedError
|
90 |
+
|
91 |
+
img_path0, img_path1 = self.scene_info['image_paths'][idx0], self.scene_info['image_paths'][idx1]
|
92 |
+
K_0 = torch.zeros((3,3), dtype=torch.float) if self.intrins is None else torch.from_numpy(self.intrins[idx0]).float()
|
93 |
+
K_1 = torch.zeros((3,3), dtype=torch.float) if self.intrins is None else torch.from_numpy(self.intrins[idx1]).float()
|
94 |
+
|
95 |
+
# read grayscale image and mask. (1, h, w) and (h, w)
|
96 |
+
img_name0 = osp.join(self.root_dir, self.dataset_name, img_path0)
|
97 |
+
img_name1 = osp.join(self.root_dir, self.dataset_name, img_path1) # Often transformed image based on img0, e.g., depth estimation or Diffusion
|
98 |
+
# Note: should be pixel aligned with img0
|
99 |
+
|
100 |
+
image0, mask0, scale0, origin_img_size0 = read_megadepth_gray(
|
101 |
+
img_name0, self.img_resize, self.df, self.img_padding, None, read_gray=self.read_gray, normalize_img=self.normalize_img, resize_by_stretch=self.resize_by_stretch)
|
102 |
+
# np.random.choice([self.augment_fn, None], p=[0.5, 0.5]))
|
103 |
+
image1, mask1, scale1, origin_img_size1 = read_megadepth_gray(
|
104 |
+
img_name1, self.img_resize, self.df, self.img_padding, None, read_gray=self.read_gray, normalize_img=self.normalize_img, resize_by_stretch=self.resize_by_stretch)
|
105 |
+
|
106 |
+
if self.gt_2D_warp is not None:
|
107 |
+
gt_warp = np.concatenate([self.gt_2D_warp[idx], [[0,0,1]]]) # 3 * 3
|
108 |
+
else:
|
109 |
+
gt_warp = np.zeros((3, 3))
|
110 |
+
|
111 |
+
depth0 = depth1 = torch.zeros([self.depth_max_size, self.depth_max_size], dtype=torch.float)
|
112 |
+
|
113 |
+
homo_mask0 = torch.zeros((1, image0.shape[-2], image0.shape[-1]))
|
114 |
+
homo_mask1 = torch.zeros((1, image1.shape[-2], image1.shape[-1]))
|
115 |
+
gt_matches = torch.zeros((self.gt_matches_padding_n, 4), dtype=torch.float)
|
116 |
+
|
117 |
+
if self.poses is None:
|
118 |
+
T_0to1 = T_1to0 = torch.zeros((4,4), dtype=torch.float) # (4, 4)
|
119 |
+
else:
|
120 |
+
# read and compute relative poses
|
121 |
+
T0 = self.poses[idx0]
|
122 |
+
T1 = self.poses[idx1]
|
123 |
+
T_0to1 = torch.tensor(np.matmul(T1, np.linalg.inv(T0)), dtype=torch.float)[:4, :4] # (4, 4)
|
124 |
+
T_1to0 = T_0to1.inverse()
|
125 |
+
|
126 |
+
if self.fp16:
|
127 |
+
data = {
|
128 |
+
'image0': image0.half(), # (1, h, w)
|
129 |
+
'depth0': depth0.half(), # (h, w)
|
130 |
+
'image1': image1.half(),
|
131 |
+
'depth1': depth1.half(),
|
132 |
+
'T_0to1': T_0to1, # (4, 4)
|
133 |
+
'T_1to0': T_1to0,
|
134 |
+
'K0': K_0, # (3, 3)
|
135 |
+
'K1': K_1,
|
136 |
+
'homo_mask0': homo_mask0,
|
137 |
+
'homo_mask1': homo_mask1,
|
138 |
+
'gt_matches': gt_matches,
|
139 |
+
'gt_matches_mask': torch.zeros((1,), dtype=torch.bool),
|
140 |
+
'homography': torch.from_numpy(gt_warp.astype(np.float32)),
|
141 |
+
'norm_pixel_mat': torch.zeros((3,3), dtype=torch.float),
|
142 |
+
'homo_sample_normed': torch.zeros((3,3), dtype=torch.float),
|
143 |
+
'origin_img_size0': origin_img_size0,
|
144 |
+
'origin_img_size1': origin_img_size1,
|
145 |
+
'scale0': scale0.half(), # [scale_w, scale_h]
|
146 |
+
'scale1': scale1.half(),
|
147 |
+
'dataset_name': 'MegaDepth',
|
148 |
+
'scene_id': self.scene_id,
|
149 |
+
'pair_id': idx,
|
150 |
+
'pair_names': (img_path0, img_path1),
|
151 |
+
}
|
152 |
+
else:
|
153 |
+
data = {
|
154 |
+
'image0': image0, # (1, h, w)
|
155 |
+
'depth0': depth0, # (h, w)
|
156 |
+
'image1': image1,
|
157 |
+
'depth1': depth1,
|
158 |
+
'T_0to1': T_0to1, # (4, 4)
|
159 |
+
'T_1to0': T_1to0,
|
160 |
+
'K0': K_0, # (3, 3)
|
161 |
+
'K1': K_1,
|
162 |
+
'homo_mask0': homo_mask0,
|
163 |
+
'homo_mask1': homo_mask1,
|
164 |
+
'homography': torch.from_numpy(gt_warp.astype(np.float32)),
|
165 |
+
'norm_pixel_mat': torch.zeros((3,3), dtype=torch.float),
|
166 |
+
'homo_sample_normed': torch.zeros((3,3), dtype=torch.float),
|
167 |
+
'gt_matches': gt_matches,
|
168 |
+
'gt_matches_mask': torch.zeros((1,), dtype=torch.bool),
|
169 |
+
'origin_img_size0': origin_img_size0, # H W
|
170 |
+
'origin_img_size1': origin_img_size1,
|
171 |
+
'scale0': scale0, # [scale_w, scale_h]
|
172 |
+
'scale1': scale1,
|
173 |
+
'dataset_name': 'MegaDepth',
|
174 |
+
'scene_id': self.scene_id,
|
175 |
+
'pair_id': idx,
|
176 |
+
'pair_names': (img_path0, img_path1),
|
177 |
+
'rel_pair_names': (img_path0, img_path1)
|
178 |
+
}
|
179 |
+
|
180 |
+
if self.gt_2D_matches is not None:
|
181 |
+
data.update({'gt_2D_matches': torch.from_numpy(self.gt_2D_matches[idx]).to(torch.float)}) # N * 4
|
182 |
+
|
183 |
+
if self.gt_matches is not None:
|
184 |
+
gt_matches_ = self.gt_matches[idx]
|
185 |
+
if isinstance(gt_matches_, str):
|
186 |
+
gt_matches_ = np.load(osp.join(self.root_dir, self.dataset_name, gt_matches_), allow_pickle=True)
|
187 |
+
gt_matches_ = torch.from_numpy(gt_matches_).to(torch.float) # N * 4: mkpts0, mkpts1
|
188 |
+
# Warp mkpts1 by sampled homo:
|
189 |
+
num = min(len(gt_matches_), self.gt_matches_padding_n)
|
190 |
+
gt_matches[:num] = gt_matches_[:num]
|
191 |
+
|
192 |
+
data.update({"gt_matches": gt_matches, 'gt_matches_mask': torch.ones((1,), dtype=torch.bool), 'norm_pixel_mat': torch.zeros((3,3), dtype=torch.float), "homo_sample_normed": torch.zeros((3,3), dtype=torch.float)})
|
193 |
+
|
194 |
+
if mask0 is not None: # img_padding is True
|
195 |
+
if self.coarse_scale:
|
196 |
+
if self.fix_bias:
|
197 |
+
[ts_mask_0, ts_mask_1] = F.interpolate(torch.stack([mask0, mask1], dim=0)[None].float(),
|
198 |
+
size=((image0.shape[1]-1)//8+1, (image0.shape[2]-1)//8+1),
|
199 |
+
mode='nearest',
|
200 |
+
recompute_scale_factor=False)[0].bool()
|
201 |
+
else:
|
202 |
+
[ts_mask_0, ts_mask_1] = F.interpolate(torch.stack([mask0, mask1], dim=0)[None].float(),
|
203 |
+
scale_factor=self.coarse_scale,
|
204 |
+
mode='nearest',
|
205 |
+
recompute_scale_factor=False)[0].bool()
|
206 |
+
if self.fp16:
|
207 |
+
data.update({'mask0': ts_mask_0, 'mask1': ts_mask_1})
|
208 |
+
else:
|
209 |
+
data.update({'mask0': ts_mask_0, 'mask1': ts_mask_1})
|
210 |
+
|
211 |
+
if self.load_origin_rgb:
|
212 |
+
data.update({"image0_rgb_origin": torch.from_numpy(np.array(Image.open(img_name0).convert("RGB"))).permute(2,0,1) / 255., "image1_rgb_origin": torch.from_numpy(np.array(Image.open(img_name1).convert("RGB"))).permute(2,0,1)/ 255.})
|
213 |
+
|
214 |
+
return data
|
imcui/third_party/MatchAnything/tools/evaluate_datasets.py
CHANGED
@@ -25,8 +25,7 @@ from src.utils.homography_utils import warp_points
|
|
25 |
from src.datasets.common_data_pair import CommonDataset
|
26 |
from src.utils.metrics import error_auc
|
27 |
from tools_utils.plot import plot_matches, warp_img_and_blend, epipolar_error
|
28 |
-
|
29 |
-
from pairs_match_and_propogation.utils.data_io import save_h5
|
30 |
|
31 |
def parse_args():
|
32 |
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
|
|
25 |
from src.datasets.common_data_pair import CommonDataset
|
26 |
from src.utils.metrics import error_auc
|
27 |
from tools_utils.plot import plot_matches, warp_img_and_blend, epipolar_error
|
28 |
+
from tools_utils.data_io import save_h5
|
|
|
29 |
|
30 |
def parse_args():
|
31 |
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
imcui/third_party/MatchAnything/tools/tools_utils/data_io.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pickle
|
2 |
+
import h5py
|
3 |
+
import os
|
4 |
+
import numpy as np
|
5 |
+
import torch
|
6 |
+
from tqdm import tqdm
|
7 |
+
|
8 |
+
def dict_to_cuda(data_dict):
|
9 |
+
data_dict_cuda = {}
|
10 |
+
for k, v in data_dict.items():
|
11 |
+
if isinstance(v, torch.Tensor):
|
12 |
+
data_dict_cuda[k] = v.cuda()
|
13 |
+
elif isinstance(v, dict):
|
14 |
+
data_dict_cuda[k] = dict_to_cuda(v)
|
15 |
+
elif isinstance(v, list):
|
16 |
+
data_dict_cuda[k] = list_to_cuda(v)
|
17 |
+
else:
|
18 |
+
data_dict_cuda[k] = v
|
19 |
+
return data_dict_cuda
|
20 |
+
|
21 |
+
def list_to_cuda(data_list):
|
22 |
+
data_list_cuda = []
|
23 |
+
for obj in data_list:
|
24 |
+
if isinstance(obj, torch.Tensor):
|
25 |
+
data_list_cuda.append(obj.cuda())
|
26 |
+
elif isinstance(obj, dict):
|
27 |
+
data_list_cuda.append(dict_to_cuda(obj))
|
28 |
+
elif isinstance(obj, list):
|
29 |
+
data_list_cuda.append(list_to_cuda(obj))
|
30 |
+
else:
|
31 |
+
data_list_cuda.append(obj)
|
32 |
+
return data_list_cuda
|
33 |
+
|
34 |
+
def save_obj(obj, name ):
|
35 |
+
with open(name, 'wb') as f:
|
36 |
+
pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
|
37 |
+
|
38 |
+
def load_obj(name):
|
39 |
+
with open(name, 'rb') as f:
|
40 |
+
return pickle.load(f)
|
41 |
+
|
42 |
+
def load_h5(file_path, transform_slash=True, parallel=False):
|
43 |
+
"""load the whole h5 file into memory (not memmaped)
|
44 |
+
TODO: Loading data in parallel
|
45 |
+
"""
|
46 |
+
with h5py.File(file_path, 'r') as f:
|
47 |
+
# if parallel:
|
48 |
+
# Parallel()
|
49 |
+
data = {k if (not transform_slash) or (not isinstance(k, str)) else k.replace('+', '/'): v.__array__() \
|
50 |
+
for k, v in f.items()}
|
51 |
+
return data
|
52 |
+
|
53 |
+
def save_h5(dict_to_save, filename, transform_slash=True, verbose=False, as_half=False):
|
54 |
+
"""Saves dictionary to hdf5 file"""
|
55 |
+
with h5py.File(filename, 'w') as f:
|
56 |
+
for key in tqdm(dict_to_save, disable=not verbose): # h5py doesn't allow '/' in object name (will leads to sub-group)
|
57 |
+
if isinstance(key, str):
|
58 |
+
save_key = key.replace('/', '+') if transform_slash else key
|
59 |
+
else:
|
60 |
+
save_key = key
|
61 |
+
if as_half:
|
62 |
+
try:
|
63 |
+
dt = dict_to_save[key].dtype
|
64 |
+
if (dt == np.float32) and (dt != np.float16):
|
65 |
+
data = dict_to_save[key].astype(np.float16)
|
66 |
+
else:
|
67 |
+
data = dict_to_save[key]
|
68 |
+
except:
|
69 |
+
data = dict_to_save[key]
|
70 |
+
f.create_dataset(save_key,
|
71 |
+
data=data)
|
72 |
+
else:
|
73 |
+
f.create_dataset(save_key,
|
74 |
+
data=dict_to_save[key])
|
75 |
+
|
76 |
+
|
77 |
+
def load_calib(calib_fullpath_list, subset_index=None):
|
78 |
+
"""Load all IMC calibration files and create a dictionary."""
|
79 |
+
|
80 |
+
calib = {}
|
81 |
+
if subset_index is None:
|
82 |
+
for _calib_file in calib_fullpath_list:
|
83 |
+
img_name = os.path.splitext(os.path.basename(_calib_file))[0].replace(
|
84 |
+
"calibration_", ""
|
85 |
+
)
|
86 |
+
calib[img_name] = load_h5(_calib_file)
|
87 |
+
else:
|
88 |
+
for idx in subset_index:
|
89 |
+
_calib_file = calib_fullpath_list[idx]
|
90 |
+
img_name = os.path.splitext(os.path.basename(_calib_file))[0].replace(
|
91 |
+
"calibration_", ""
|
92 |
+
)
|
93 |
+
calib[img_name] = load_h5(_calib_file)
|
94 |
+
return calib
|