Spaces:
Running
Running
Delete visual_matcher.py
Browse files- visual_matcher.py +0 -209
visual_matcher.py
DELETED
@@ -1,209 +0,0 @@
|
|
1 |
-
import time
|
2 |
-
import numpy as np
|
3 |
-
from PIL import Image
|
4 |
-
from scipy.spatial.distance import cdist
|
5 |
-
from scipy.optimize import linear_sum_assignment
|
6 |
-
|
7 |
-
|
8 |
-
class SimpleAffineTransform:
|
9 |
-
"""
|
10 |
-
simple affine transform, only translation and scale.
|
11 |
-
"""
|
12 |
-
def __init__(self, translation=(0, 0), scale=1.0):
|
13 |
-
self.translation = np.array(translation)
|
14 |
-
self.scale = scale
|
15 |
-
|
16 |
-
def estimate(self, src, dst):
|
17 |
-
src_center = np.mean(src, axis=0)
|
18 |
-
dst_center = np.mean(dst, axis=0)
|
19 |
-
self.translation = dst_center - src_center
|
20 |
-
|
21 |
-
src_dists = np.linalg.norm(src - src_center, axis=1)
|
22 |
-
dst_dists = np.linalg.norm(dst - dst_center, axis=1)
|
23 |
-
self.scale = np.mean(dst_dists) / (np.mean(src_dists) + 1e-10)
|
24 |
-
|
25 |
-
def inverse(self):
|
26 |
-
inverse_transform = AffineTransform(-self.translation, 1.0/self.scale)
|
27 |
-
return inverse_transform
|
28 |
-
|
29 |
-
def __call__(self, coords):
|
30 |
-
return self.scale * (coords - np.mean(coords, axis=0)) + np.mean(coords, axis=0) + self.translation
|
31 |
-
|
32 |
-
def residuals(self, src, dst):
|
33 |
-
return np.sqrt(np.sum((self(src) - dst) ** 2, axis=1))
|
34 |
-
|
35 |
-
|
36 |
-
def norm_coords(x, left, right):
|
37 |
-
if x < left:
|
38 |
-
return left
|
39 |
-
if x > right:
|
40 |
-
return right
|
41 |
-
return x
|
42 |
-
|
43 |
-
def norm_same_token(token):
|
44 |
-
special_map = {
|
45 |
-
"\\dot": ".",
|
46 |
-
"\\Dot": ".",
|
47 |
-
"\\cdot": ".",
|
48 |
-
"\\cdotp": ".",
|
49 |
-
"\\ldotp": ".",
|
50 |
-
"\\mid": "|",
|
51 |
-
"\\rightarrow": "\\to",
|
52 |
-
"\\top": "T",
|
53 |
-
"\\Tilde": "\\tilde",
|
54 |
-
"\\prime": "'",
|
55 |
-
"\\ast": "*",
|
56 |
-
"\\left<": "\\langle",
|
57 |
-
"\\right>": "\\rangle",
|
58 |
-
"\\lbrace": "\{",
|
59 |
-
"\\rbrace": "\}",
|
60 |
-
"\\lbrack": "[",
|
61 |
-
"\\rbrack": "]",
|
62 |
-
"\\blackslash": "/",
|
63 |
-
"\\slash": "/",
|
64 |
-
"\\leq": "\\le",
|
65 |
-
"\\geq": "\\ge",
|
66 |
-
"\\neq": "\\ne",
|
67 |
-
"\\Vert": "\\|",
|
68 |
-
"\\lVert": "\\|",
|
69 |
-
"\\rVert": "\\|",
|
70 |
-
"\\vert": "|",
|
71 |
-
"\\lvert": "|",
|
72 |
-
"\\rvert": "|",
|
73 |
-
"\\colon": ":",
|
74 |
-
"\\Ddot": "\\ddot",
|
75 |
-
"\\Bar": "\\bar",
|
76 |
-
"\\Vec": "\\vec",
|
77 |
-
"\\parallel": "\\|",
|
78 |
-
"\\dag": "\\dagger",
|
79 |
-
"\\ddag": "\\ddagger",
|
80 |
-
"\\textlangle": "<",
|
81 |
-
"\\textrangle": ">",
|
82 |
-
"\\textgreater": ">",
|
83 |
-
"\\textless": "<",
|
84 |
-
"\\textbackslash": "n",
|
85 |
-
"\\textunderscore": "_",
|
86 |
-
"\\=": "_",
|
87 |
-
"\\neg": "\\lnot",
|
88 |
-
"\\neq": "\\not=",
|
89 |
-
}
|
90 |
-
if token.startswith('\\left') or token.startswith('\\right'):
|
91 |
-
if "arrow" not in token and "<" not in token and ">" not in token and "harpoon" not in token:
|
92 |
-
token = token.replace("\\left", "").replace("\\right", "")
|
93 |
-
if token.startswith('\\big') or token.startswith('\\Big'):
|
94 |
-
if "\\" in token[4:]:
|
95 |
-
token = "\\"+token[4:].split("\\")[-1]
|
96 |
-
else:
|
97 |
-
token = token[-1]
|
98 |
-
if token in special_map.keys():
|
99 |
-
token = special_map[token]
|
100 |
-
if token.startswith('\\wide'):
|
101 |
-
return token.replace("wide", "")
|
102 |
-
if token.startswith('\\var'):
|
103 |
-
return token.replace("var", "")
|
104 |
-
if token.startswith('\\string'):
|
105 |
-
return token.replace("\\string", "")
|
106 |
-
return token
|
107 |
-
|
108 |
-
|
109 |
-
class HungarianMatcher:
|
110 |
-
def __init__(
|
111 |
-
self,
|
112 |
-
cost_token: float = 1,
|
113 |
-
cost_position: float = 0.05,
|
114 |
-
cost_order: float = 0.15,
|
115 |
-
):
|
116 |
-
self.cost_token = cost_token
|
117 |
-
self.cost_position = cost_position
|
118 |
-
self.cost_order = cost_order
|
119 |
-
self.cost = {}
|
120 |
-
|
121 |
-
def calculate_token_cost(self, box_gt, box_pred):
|
122 |
-
token2id = {}
|
123 |
-
for data in box_gt+box_pred:
|
124 |
-
if data['token'] not in token2id:
|
125 |
-
token2id[data['token']] = len(token2id)
|
126 |
-
num_classes = len(token2id)
|
127 |
-
|
128 |
-
token2id_norm = {}
|
129 |
-
for data in box_gt+box_pred:
|
130 |
-
if norm_same_token(data['token']) not in token2id_norm:
|
131 |
-
token2id_norm[norm_same_token(data['token'])] = len(token2id_norm)
|
132 |
-
num_classes_norm = len(token2id_norm)
|
133 |
-
|
134 |
-
gt_token_array = []
|
135 |
-
norm_gt_token_array = []
|
136 |
-
for data in box_gt:
|
137 |
-
gt_token_array.append(token2id[data['token']])
|
138 |
-
norm_gt_token_array.append(token2id_norm[norm_same_token(data['token'])])
|
139 |
-
|
140 |
-
pred_token_logits = []
|
141 |
-
norm_pred_token_logits = []
|
142 |
-
for data in box_pred:
|
143 |
-
logits = [0] * num_classes
|
144 |
-
logits[token2id[data['token']]] = 1
|
145 |
-
pred_token_logits.append(logits)
|
146 |
-
|
147 |
-
logits_norm = [0] * num_classes_norm
|
148 |
-
logits_norm[token2id_norm[norm_same_token(data['token'])]] = 1
|
149 |
-
norm_pred_token_logits.append(logits_norm)
|
150 |
-
|
151 |
-
gt_token_array = np.array(gt_token_array)
|
152 |
-
pred_token_logits = np.array(pred_token_logits)
|
153 |
-
|
154 |
-
norm_gt_token_array = np.array(norm_gt_token_array)
|
155 |
-
norm_pred_token_logits = np.array(norm_pred_token_logits)
|
156 |
-
|
157 |
-
token_cost = 1.0 - pred_token_logits[:, gt_token_array]
|
158 |
-
norm_token_cost = 1.0 - norm_pred_token_logits[:, norm_gt_token_array]
|
159 |
-
|
160 |
-
token_cost[np.logical_and(token_cost==1, norm_token_cost==0)] = 0.005
|
161 |
-
return token_cost.T
|
162 |
-
|
163 |
-
|
164 |
-
def box2array(self, box_list, size):
|
165 |
-
W, H = size
|
166 |
-
box_array = []
|
167 |
-
for box in box_list:
|
168 |
-
x_min, y_min, x_max, y_max = box['bbox']
|
169 |
-
box_array.append([x_min/W, y_min/H, x_max/W, y_max/H])
|
170 |
-
return np.array(box_array)
|
171 |
-
|
172 |
-
def order2array(self, box_list, max_token_lens=None):
|
173 |
-
if not max_token_lens:
|
174 |
-
max_token_lens = len(box_list)
|
175 |
-
order_array = []
|
176 |
-
for idx, box in enumerate(box_list):
|
177 |
-
order_array.append([idx / max_token_lens])
|
178 |
-
return np.array(order_array)
|
179 |
-
|
180 |
-
def calculate_l1_cost(self, gt_array, pred_array):
|
181 |
-
scale = gt_array.shape[-1]
|
182 |
-
l1_cost = cdist(gt_array, pred_array, 'minkowski', p=1)
|
183 |
-
return l1_cost / scale
|
184 |
-
|
185 |
-
def __call__(self, box_gt, box_pred, gt_size, pred_size):
|
186 |
-
aa = time.time()
|
187 |
-
gt_box_array = self.box2array(box_gt, gt_size)
|
188 |
-
pred_box_array = self.box2array(box_pred, pred_size)
|
189 |
-
|
190 |
-
max_token_lens = max(len(box_gt), len(box_pred))
|
191 |
-
gt_order_array = self.order2array(box_gt, max_token_lens)
|
192 |
-
pred_order_array = self.order2array(box_pred, max_token_lens)
|
193 |
-
|
194 |
-
token_cost = self.calculate_token_cost(box_gt, box_pred)
|
195 |
-
position_cost = self.calculate_l1_cost(gt_box_array, pred_box_array)
|
196 |
-
order_cost = self.calculate_l1_cost(gt_order_array, pred_order_array)
|
197 |
-
|
198 |
-
self.cost["token"] = token_cost
|
199 |
-
self.cost["position"] = position_cost
|
200 |
-
self.cost["order"] = order_cost
|
201 |
-
|
202 |
-
cost = self.cost_token * token_cost + self.cost_position * position_cost + self.cost_order * order_cost
|
203 |
-
cost[np.isnan(cost) | np.isinf(cost)] = 100
|
204 |
-
indexes = linear_sum_assignment(cost)
|
205 |
-
matched_idxes = []
|
206 |
-
for a, b in zip(*indexes):
|
207 |
-
matched_idxes.append((a, b))
|
208 |
-
|
209 |
-
return matched_idxes
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|