mlbench123 commited on
Commit
06927d3
·
verified ·
1 Parent(s): 51f3b1c

Upload 7 files

Browse files
Files changed (7) hide show
  1. .gitattributes +35 -35
  2. .gitignore +1 -0
  3. GMM.py +672 -0
  4. README.md +12 -12
  5. app.py +115 -0
  6. gmm_model.joblib +3 -0
  7. requirements.txt +10 -0
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ venv/
GMM.py ADDED
@@ -0,0 +1,672 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import cv2 as cv
3
+ import os
4
+ from numpy.linalg import norm, inv
5
+ from scipy.stats import multivariate_normal as mv_norm
6
+ import joblib # or import pickle
7
+ import os
8
+ import torch
9
+ from torch.distributions import MultivariateNormal
10
+ import torch.nn.functional as F
11
+ init_weight = [0.7, 0.11, 0.1, 0.09]
12
+ init_u = np.zeros(3)
13
+ # initial Covariance matrix
14
+ init_sigma = 225*np.eye(3)
15
+ init_alpha = 0.05
16
+
17
+ class GMM():
18
+ def __init__(self, data_dir, train_num, alpha=init_alpha):
19
+ self.data_dir = data_dir
20
+ self.train_num = train_num
21
+ self.alpha = alpha
22
+ self.img_shape = None
23
+
24
+ self.weight = None
25
+ self.mu = None
26
+ self.sigma = None
27
+ self.K = None
28
+ self.B = None
29
+
30
+ def check(self, pixel, mu, sigma):
31
+ '''
32
+ Check whether a pixel matches a Gaussian distribution.
33
+ Matching means the Mahalanobis distance is less than 2.5.
34
+ '''
35
+ # Convert to torch tensors on same device
36
+ if isinstance(mu, np.ndarray):
37
+ mu = torch.from_numpy(mu).float()
38
+ if isinstance(sigma, np.ndarray):
39
+ sigma = torch.from_numpy(sigma).float()
40
+ if isinstance(pixel, np.ndarray):
41
+ pixel = torch.from_numpy(pixel).float()
42
+
43
+ # Ensure all are on the same device
44
+ device = mu.device
45
+ pixel = pixel.to(device)
46
+ sigma = sigma.to(device)
47
+
48
+ # Compute Mahalanobis distance
49
+ delta = pixel - mu
50
+ sigma_inv = torch.linalg.inv(sigma)
51
+ d_squared = delta @ sigma_inv @ delta
52
+ d = torch.sqrt(d_squared + 1e-5)
53
+
54
+ return d.item() < 0.1
55
+
56
+ # def train(self, K=4):
57
+ # '''
58
+ # train model
59
+ # '''
60
+ # self.K = K
61
+ # file_list = []
62
+ # # file numbers are from 1 to train_number
63
+ # for i in range(self.train_num):
64
+ # file_name = os.path.join(self.data_dir, 'b%05d' % i + '.bmp')
65
+ # file_list.append(file_name)
66
+
67
+ # img_init = cv.imread(file_list[0])
68
+ # img_shape = img_init.shape
69
+ # self.img_shape = img_shape
70
+ # self.weight = np.array([[init_weight for j in range(self.img_shape[1])] for i in range(self.img_shape[0])])
71
+ # self.mu = np.array([[[init_u for k in range(self.K)] for j in range(img_shape[1])]
72
+ # for i in range(img_shape[0])])
73
+ # self.sigma = np.array([[[init_sigma for k in range(self.K)] for j in range(img_shape[1])]
74
+ # for i in range(img_shape[0])])
75
+
76
+ # self.B = np.ones(self.img_shape[0:2], dtype=int)
77
+ # for i in range(img_shape[0]):
78
+ # for j in range(img_shape[1]):
79
+ # for k in range(self.K):
80
+ # self.mu[i][j][k] = np.array(img_init[i][j]).reshape(1,3)
81
+ # for i in range(self.K):
82
+ # print('u:{}'.format(self.mu[100][100][i]))
83
+ # # update process
84
+ # for file in file_list:
85
+ # print('training:{}'.format(file))
86
+ # img=cv.imread(file)
87
+ # for i in range(img.shape[0]):
88
+ # for j in range(img.shape[1]):
89
+ # # Check whether match the existing K Gaussian distributions
90
+ # match = -1
91
+ # for k in range(K):
92
+ # if self.check(img[i][j], self.mu[i][j][k], self.sigma[i][j][k]):
93
+ # match = k
94
+ # break
95
+ # # a match found
96
+ # if match != -1:
97
+ # mu = self.mu[i][j][k]
98
+ # sigma = self.sigma[i][j][k]
99
+ # x = img[i][j].astype(float)
100
+ # delta = x - mu
101
+ # rho = self.alpha * mv_norm.pdf(img[i][j], mu, sigma)
102
+ # self.weight[i][j] = (1 - self.alpha) * self.weight[i][j]
103
+ # self.weight[i][j][match] += self.alpha
104
+ # # self.weight[i][j][k] = self.weight[i][j][k] + self.alpha*(m - self.weight[i][j][k])
105
+ # self.mu[i][j][k] = mu + rho * delta
106
+ # self.sigma[i][j][k] = sigma + rho * (np.matmul(delta, delta.T) - sigma)
107
+ # # if none of the K distributions match the current value
108
+ # # the least probable distribution is replaced with a distribution
109
+ # # with current value as its mean, an initially high variance and low rior weight
110
+ # if match == -1:
111
+ # w_list = [self.weight[i][j][k] for k in range(K)]
112
+ # id = w_list.index(min(w_list))
113
+ # # weight keep same, replace mean with current value and set high variance
114
+ # self.mu[i][j][id] = np.array(img[i][j]).reshape(1, 3)
115
+ # self.sigma[i][j][id] = np.array(init_sigma)
116
+ # print('img:{}'.format(img[100][100]))
117
+ # print('weight:{}'.format(self.weight[100][100]))
118
+ # self.reorder()
119
+ # for i in range(self.K):
120
+ # print('u:{}'.format(self.mu[100][100][i]))
121
+
122
+ def train(self, K=4):
123
+ '''
124
+ train model with GPU acceleration
125
+ '''
126
+ self.K = K
127
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
128
+ print(f"Using device: {device}")
129
+
130
+ file_list = []
131
+ for i in range(self.train_num):
132
+ file_name = os.path.join(self.data_dir, 'b%05d' % i + '.bmp')
133
+ file_list.append(file_name)
134
+
135
+ # Initialize with first image
136
+ img_init = cv.imread(file_list[0])
137
+ img_shape = img_shape = img_init.shape
138
+ self.img_shape = img_shape
139
+ height, width, channels = img_shape
140
+
141
+ # Initialize model parameters on GPU
142
+ self.weight = torch.full((height, width, K), 1.0/K,
143
+ dtype=torch.float32, device=device)
144
+ self.mu = torch.zeros(height, width, K, 3,
145
+ dtype=torch.float32, device=device)
146
+ self.sigma = torch.zeros(height, width, K, 3, 3,
147
+ dtype=torch.float32, device=device)
148
+ self.B = torch.ones((height, width),
149
+ dtype=torch.int32, device=device)
150
+
151
+ # Initialize mu with first image values
152
+ img_tensor = torch.from_numpy(img_init).float().to(device)
153
+ for k in range(K):
154
+ self.mu[:, :, k, :] = img_tensor
155
+
156
+ # Initialize sigma with identity matrix * 225
157
+ self.sigma[:] = torch.eye(3, device=device) * 225
158
+
159
+ # Training loop
160
+ for file in file_list:
161
+ print('training:{}'.format(file))
162
+ img = cv.imread(file)
163
+ img_tensor = torch.from_numpy(img).float().to(device) # (H,W,3)
164
+
165
+ # Check matches for all pixels
166
+ matches = torch.full((height, width), -1, dtype=torch.long, device=device)
167
+
168
+ for k in range(K):
169
+ # Calculate Mahalanobis distance for each distribution
170
+ delta = img_tensor.unsqueeze(2) - self.mu # (H,W,K,3)
171
+ sigma_inv = torch.linalg.inv(self.sigma) # (H,W,K,3,3)
172
+
173
+ # Compute (x-μ)T Σ^-1 (x-μ)
174
+ temp = torch.einsum('hwki,hwkij->hwkj', delta, sigma_inv)
175
+ mahalanobis = torch.sqrt(torch.einsum('hwki,hwki->hwk', temp, delta))
176
+
177
+ # Update matches where distance < 2.5 and not already matched
178
+ match_mask = (mahalanobis[:,:,k] < 2.5) & (matches == -1)
179
+ matches[match_mask] = k
180
+
181
+ # Process matched pixels
182
+ for k in range(K):
183
+ # Get mask for current distribution matches
184
+ mask = matches == k
185
+ if mask.any():
186
+ # Get matched pixels
187
+ matched_pixels = img_tensor[mask] # (N,3)
188
+ matched_mu = self.mu[:,:,k,:][mask] # (N,3)
189
+ matched_sigma = self.sigma[:,:,k,:,:][mask] # (N,3,3)
190
+
191
+ try:
192
+ # Create multivariate normal distribution
193
+ mvn = MultivariateNormal(matched_mu,
194
+ covariance_matrix=matched_sigma)
195
+
196
+ # Calculate rho
197
+ rho = self.alpha * torch.exp(mvn.log_prob(matched_pixels))
198
+
199
+ # Update weights
200
+ self.weight[:,:,k][mask] = (1 - self.alpha) * self.weight[:,:,k][mask] + self.alpha
201
+
202
+ # Update mu
203
+ delta = matched_pixels - matched_mu
204
+ self.mu[:,:,k,:][mask] += rho.unsqueeze(1) * delta
205
+
206
+ # Update sigma
207
+ delta_outer = torch.einsum('bi,bj->bij', delta, delta)
208
+ sigma_update = rho.unsqueeze(1).unsqueeze(2) * (delta_outer - matched_sigma)
209
+ self.sigma[:,:,k,:,:][mask] += sigma_update
210
+
211
+ except RuntimeError as e:
212
+ print(f"Error updating distribution {k}: {e}")
213
+ continue
214
+
215
+ # Process non-matched pixels
216
+ non_matched = matches == -1
217
+ if non_matched.any():
218
+ # Find least probable distribution for each non-matched pixel
219
+ weight_non_matched = self.weight[non_matched] # shape: (N, K)
220
+ min_weight_idx = torch.argmin(weight_non_matched, dim=1) # shape: (N,)
221
+
222
+ # Create flat indices of non-matched pixels
223
+ non_matched_indices = non_matched.nonzero(as_tuple=False) # shape: (N, 2)
224
+
225
+ for k in range(K):
226
+ # Find positions where min_weight_idx == k
227
+ k_mask = (min_weight_idx == k)
228
+ if k_mask.any():
229
+ selected_indices = non_matched_indices[k_mask] # shape: (M, 2)
230
+ y_idx = selected_indices[:, 0]
231
+ x_idx = selected_indices[:, 1]
232
+
233
+ # Update mu and sigma
234
+ self.mu[y_idx, x_idx, k, :] = img_tensor[y_idx, x_idx]
235
+ self.sigma[y_idx, x_idx, k, :, :] = torch.eye(3, device=device) * 225
236
+
237
+ # Convert to numpy for reordering and debug prints
238
+ weight_np = self.weight.cpu().numpy()
239
+ mu_np = self.mu.cpu().numpy()
240
+ sigma_np = self.sigma.cpu().numpy()
241
+ B_np = self.B.cpu().numpy()
242
+
243
+ print('img:{}'.format(img[100][100]))
244
+ print('weight:{}'.format(weight_np[100][100]))
245
+
246
+ # Update numpy arrays for reorder
247
+ self.weight = weight_np
248
+ self.mu = mu_np
249
+ self.sigma = sigma_np
250
+ self.B = B_np
251
+
252
+ self.reorder()
253
+ for i in range(self.K):
254
+ print('u:{}'.format(self.mu[100][100][i]))
255
+
256
+ # Move back to GPU for next iteration
257
+ self.weight = torch.from_numpy(self.weight).to(device)
258
+ self.mu = torch.from_numpy(self.mu).to(device)
259
+ self.sigma = torch.from_numpy(self.sigma).to(device)
260
+ self.B = torch.from_numpy(self.B).to(device)
261
+
262
+ def save_model(self, file_path):
263
+ """
264
+ Save the trained model to a file
265
+ """
266
+ # Only make directories if there is a directory in the path
267
+ dir_name = os.path.dirname(file_path)
268
+ if dir_name:
269
+ os.makedirs(dir_name, exist_ok=True)
270
+
271
+ joblib.dump({
272
+ 'weight': self.weight,
273
+ 'mu': self.mu,
274
+ 'sigma': self.sigma,
275
+ 'K': self.K,
276
+ 'B': self.B,
277
+ 'img_shape': self.img_shape,
278
+ 'alpha': self.alpha,
279
+ 'data_dir': self.data_dir,
280
+ 'train_num': self.train_num
281
+ }, file_path)
282
+
283
+ print(f"Model saved to {file_path}")
284
+
285
+ @classmethod
286
+ def load_model(cls, file_path):
287
+ """
288
+ Load a trained model from file
289
+ """
290
+ data = joblib.load(file_path)
291
+
292
+ # Create new instance
293
+ gmm = cls(data['data_dir'], data['train_num'], data['alpha'])
294
+
295
+ # Restore all attributes
296
+ gmm.weight = data['weight']
297
+ gmm.mu = data['mu']
298
+ gmm.sigma = data['sigma']
299
+ gmm.K = data['K']
300
+ gmm.B = data['B']
301
+ gmm.img_shape = data['img_shape']
302
+ gmm.image_shape = data['img_shape']
303
+
304
+ print(f"Model loaded from {file_path}")
305
+ return gmm
306
+
307
+
308
+ def reorder(self, T=0.90):
309
+ '''
310
+ Reorder the estimated components based on the ratio pi / the norm of standard deviation.
311
+ The first B components are chosen as background components.
312
+ The default threshold is 0.90.
313
+ '''
314
+ epsilon = 1e-6 # to prevent divide-by-zero
315
+
316
+ for i in range(self.img_shape[0]):
317
+ for j in range(self.img_shape[1]):
318
+ k_weight = self.weight[i][j]
319
+ k_norm = []
320
+
321
+ for k in range(self.K):
322
+ cov = self.sigma[i][j][k]
323
+ try:
324
+ if np.all(np.linalg.eigvals(cov) >= 0):
325
+ stddev = np.sqrt(cov)
326
+ k_norm.append(norm(stddev))
327
+ else:
328
+ k_norm.append(epsilon)
329
+ except:
330
+ k_norm.append(epsilon)
331
+
332
+ k_norm = np.array(k_norm)
333
+ ratio = k_weight / (k_norm + epsilon)
334
+ descending_order = np.argsort(-ratio)
335
+
336
+ self.weight[i][j] = self.weight[i][j][descending_order]
337
+ self.mu[i][j] = self.mu[i][j][descending_order]
338
+ self.sigma[i][j] = self.sigma[i][j][descending_order]
339
+
340
+ cum_weight = 0
341
+ for index, order in enumerate(descending_order):
342
+ cum_weight += self.weight[i][j][index]
343
+ if cum_weight > T:
344
+ self.B[i][j] = index + 1
345
+ break
346
+
347
+ # def infer(self, img, heatmap=None, alpha=0.1):
348
+ # '''
349
+ # Perform inference with a persistent heatmap that intensifies with movement.
350
+ # '''
351
+ # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
352
+ # img_tensor = torch.from_numpy(img).float().to(device) # (H, W, 3)
353
+
354
+ # H, W, _ = img.shape
355
+
356
+ # # Initialize heatmap on the first frame
357
+ # if heatmap is None:
358
+ # heatmap = torch.zeros((H, W), dtype=torch.float32, device=device)
359
+ # # No need for an 'else' that converts from numpy,
360
+ # # as we will pass the tensor back in subsequent calls.
361
+
362
+ # # --- Your existing foreground detection logic remains the same ---
363
+ # detection_mask = torch.ones((H, W), dtype=torch.bool, device=device)
364
+ # for k in range(self.K):
365
+ # B_mask = (self.B >= (k + 1)).to(device)
366
+ # mu_k = self.mu[:, :, k, :].to(device)
367
+ # sigma_k = self.sigma[:, :, k, :, :].to(device)
368
+ # delta = (img_tensor - mu_k).unsqueeze(-1)
369
+ # sigma_inv = torch.linalg.inv(sigma_k)
370
+ # temp = torch.matmul(sigma_inv, delta)
371
+ # dist_sq = torch.matmul(delta.transpose(-2, -1), temp).squeeze(-1).squeeze(-1)
372
+ # dist = torch.sqrt(dist_sq + 1e-5)
373
+ # match_mask = (dist < 9.5) & B_mask
374
+ # detection_mask[match_mask] = False
375
+ # img_tensor[match_mask] = mu_k[match_mask] # Optional: for visualization
376
+
377
+ # foreground_mask = detection_mask & (img_tensor.abs().sum(dim=-1) > 0)
378
+ # heatmap[foreground_mask] = torch.clamp(heatmap[foreground_mask] + alpha, 0, 1)
379
+
380
+
381
+ # # Convert heatmap tensor to a numpy array for visualization
382
+ # heatmap_np = heatmap.cpu().numpy()
383
+
384
+ # # Apply the colormap (0 -> Blue, 1 -> Red)
385
+ # heatmap_viz = cv.applyColorMap((heatmap_np * 255).astype(np.uint8), cv.COLORMAP_JET)
386
+
387
+ # # Blend the heatmap with the original image
388
+ # result = cv.addWeighted(img, 0.7, heatmap_viz, 0.5, 0)
389
+
390
+ # # Return the blended image and the heatmap tensor for the next frame
391
+ # return result, heatmap
392
+ #--------------------------------------------------------------------------------------------
393
+ # def infer(self, img, heatmap=None, decay_factor=0.95, alpha=0.1):
394
+ # '''
395
+ # Perform inference with improved heatmap reflecting persistence of foreground objects.
396
+ # Default areas remain unchanged (no bluish tone), only heatmap areas are colored.
397
+ # '''
398
+ # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
399
+ # img_tensor = torch.from_numpy(img).float().to(device) # (H, W, 3)
400
+
401
+ # H, W, _ = img.shape
402
+
403
+ # # Initialize or move heatmap to tensor on device
404
+ # if heatmap is None:
405
+ # heatmap = torch.zeros((H, W), dtype=torch.float32, device=device)
406
+ # else:
407
+ # heatmap = torch.from_numpy(heatmap).float().to(device)
408
+
409
+ # # Detection mask initialized to 1 (foreground), 0 means background
410
+ # detection_mask = torch.ones((H, W), dtype=torch.bool, device=device)
411
+
412
+ # for k in range(self.K):
413
+ # B_mask = (self.B >= (k + 1)).to(device)
414
+
415
+ # mu_k = self.mu[:, :, k, :].to(device)
416
+ # sigma_k = self.sigma[:, :, k, :, :].to(device)
417
+
418
+ # delta = img_tensor - mu_k
419
+ # delta = delta.unsqueeze(-1)
420
+
421
+ # sigma_inv = torch.linalg.inv(sigma_k)
422
+
423
+ # temp = torch.matmul(sigma_inv, delta)
424
+ # dist_sq = torch.matmul(delta.transpose(-2, -1), temp).squeeze(-1).squeeze(-1)
425
+ # dist = torch.sqrt(dist_sq + 1e-5)
426
+
427
+ # match_mask = (dist < 9.5) & B_mask
428
+
429
+ # # Mark matched pixels as background
430
+ # detection_mask[match_mask] = False
431
+
432
+ # img_tensor[match_mask] = mu_k[match_mask]
433
+
434
+ # # Foreground mask (boolean tensor)
435
+ # foreground_mask = detection_mask & (img_tensor.abs().sum(dim=-1) > 0)
436
+
437
+ # # Update heatmap:
438
+ # heatmap[foreground_mask] = torch.clamp(heatmap[foreground_mask] + alpha, 0, 1)
439
+ # heatmap[~foreground_mask] *= decay_factor
440
+
441
+ # # Convert heatmap to numpy for visualization
442
+ # heatmap_np = heatmap.cpu().numpy()
443
+
444
+ # # Create heatmap visualization
445
+ # heatmap_viz = cv.applyColorMap((heatmap_np * 255).astype(np.uint8), cv.COLORMAP_JET)
446
+
447
+ # # Create mask of significant heatmap areas (adjust threshold as needed)
448
+ # significant_heat = (heatmap_np > 0.1)
449
+
450
+ # # Initialize result with original image
451
+ # result = img.copy()
452
+
453
+ # # Only process if there are significant heat areas
454
+ # if np.any(significant_heat):
455
+ # # Ensure we have valid regions to blend
456
+ # img_region = img[significant_heat]
457
+ # heat_region = heatmap_viz[significant_heat]
458
+
459
+ # # Only blend if we have valid regions
460
+ # if img_region.size > 0 and heat_region.size > 0:
461
+ # blended = cv.addWeighted(
462
+ # img_region, 0.7,
463
+ # heat_region, 0.3,
464
+ # 0
465
+ # )
466
+ # result[significant_heat] = blended
467
+
468
+ # return result, heatmap_np
469
+ #_____________________________________________________________________________________Decay factor and working good
470
+ # def infer(self, img, heatmap=None, decay_factor=0.95, alpha=0.1):
471
+ # '''
472
+ # Perform inference with binary red mask (no intensity variation) and dilation.
473
+ # Returns:
474
+ # - result: Image with solid red overlay on detections (same dtype as input)
475
+ # - heatmap_np: Heatmap array
476
+ # '''
477
+ # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
478
+
479
+ # # Ensure input is numpy array and get original dtype
480
+ # original_dtype = img.dtype
481
+ # img = np.asarray(img).astype(np.float32)
482
+ # H, W, C = img.shape
483
+
484
+ # # Initialize tensors
485
+ # img_tensor = torch.from_numpy(img).float().to(device)
486
+
487
+ # # Initialize heatmap
488
+ # if heatmap is None:
489
+ # heatmap = torch.zeros((H, W), dtype=torch.float32, device=device)
490
+ # else:
491
+ # heatmap = torch.from_numpy(heatmap).float().to(device)
492
+
493
+ # # Detection processing (your original code)
494
+ # detection_mask = torch.ones((H, W), dtype=torch.bool, device=device)
495
+
496
+ # for k in range(self.K):
497
+ # B_mask = (self.B >= (k + 1)).to(device)
498
+ # mu_k = self.mu[:, :, k, :].to(device)
499
+ # sigma_k = self.sigma[:, :, k, :, :].to(device)
500
+
501
+ # delta = img_tensor - mu_k
502
+ # delta = delta.unsqueeze(-1)
503
+ # sigma_inv = torch.linalg.inv(sigma_k)
504
+ # temp = torch.matmul(sigma_inv, delta)
505
+ # dist_sq = torch.matmul(delta.transpose(-2, -1), temp).squeeze(-1).squeeze(-1)
506
+ # dist = torch.sqrt(dist_sq + 1e-5)
507
+ # match_mask = (dist < 9.5) & B_mask
508
+ # detection_mask[match_mask] = False
509
+ # img_tensor[match_mask] = mu_k[match_mask]
510
+
511
+ # # Update heatmap
512
+ # foreground_mask = detection_mask & (img_tensor.abs().sum(dim=-1) > 0)
513
+ # heatmap[foreground_mask] = torch.clamp(heatmap[foreground_mask] + alpha, 0, 1)
514
+ # heatmap[~foreground_mask] *= decay_factor
515
+ # heatmap_np = heatmap.cpu().numpy()
516
+
517
+ # # Create binary mask and dilate
518
+ # binary_mask = (heatmap_np > 0.1).astype(np.uint8)
519
+ # kernel = np.ones((5,5), np.uint8)
520
+ # dilated_mask = cv.dilate(binary_mask, kernel, iterations=1)
521
+
522
+ # # Create solid red overlay (BGR)
523
+ # red_overlay = np.zeros_like(img)
524
+ # red_overlay[..., 2] = 200 # Red channel
525
+
526
+ # # Apply overlay using where instead of boolean indexing
527
+ # result = np.where(
528
+ # dilated_mask[..., np.newaxis].astype(bool),
529
+ # cv.addWeighted(img, 0.7, red_overlay, 0.3, 0),
530
+ # img
531
+ # )
532
+
533
+ # # Convert back to original dtype
534
+ # if original_dtype != np.float32:
535
+ # result = np.clip(result, 0, 255).astype(original_dtype)
536
+
537
+ # return result, heatmap_np
538
+ #________________________________________________________________________________________________
539
+
540
+ # def infer(self, img, heatmap=None, alpha=0.1):
541
+ # '''
542
+ # Perform inference with binary red mask (no intensity variation) and dilation.
543
+ # Heatmap is fully recalculated every frame — no temporal decay or retention.
544
+
545
+ # Returns:
546
+ # - result: Image with solid red overlay on detections
547
+ # - heatmap_np: Binary heatmap array
548
+ # '''
549
+ # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
550
+
551
+ # # Ensure input is numpy array and get original dtype
552
+ # original_dtype = img.dtype
553
+ # img = np.asarray(img).astype(np.float32)
554
+ # H, W, C = img.shape
555
+
556
+ # # Initialize tensors
557
+ # img_tensor = torch.from_numpy(img).float().to(device)
558
+
559
+ # # Detection processing
560
+ # detection_mask = torch.ones((H, W), dtype=torch.bool, device=device)
561
+
562
+ # for k in range(self.K):
563
+ # B_mask = (self.B >= (k + 1)).to(device)
564
+ # mu_k = self.mu[:, :, k, :].to(device)
565
+ # sigma_k = self.sigma[:, :, k, :, :].to(device)
566
+
567
+ # delta = img_tensor - mu_k
568
+ # delta = delta.unsqueeze(-1)
569
+ # sigma_inv = torch.linalg.inv(sigma_k)
570
+ # temp = torch.matmul(sigma_inv, delta)
571
+ # dist_sq = torch.matmul(delta.transpose(-2, -1), temp).squeeze(-1).squeeze(-1)
572
+ # dist = torch.sqrt(dist_sq + 1e-5)
573
+ # match_mask = (dist < 9.5) & B_mask
574
+ # detection_mask[match_mask] = False
575
+ # img_tensor[match_mask] = mu_k[match_mask]
576
+
577
+ # # Generate a binary heatmap (no decay, no accumulation)
578
+ # foreground_mask = detection_mask & (img_tensor.abs().sum(dim=-1) > 0)
579
+ # heatmap = torch.zeros((H, W), dtype=torch.float32, device=device)
580
+ # heatmap[foreground_mask] = alpha
581
+ # heatmap_np = heatmap.cpu().numpy()
582
+
583
+ # # Create binary mask and dilate
584
+ # binary_mask = (heatmap_np > 0.05).astype(np.uint8)
585
+ # kernel = np.ones((5, 5), np.uint8)
586
+ # dilated_mask = cv.dilate(binary_mask, kernel, iterations=1)
587
+
588
+ # # Create solid red overlay (BGR)
589
+ # red_overlay = np.zeros_like(img)
590
+ # red_overlay[..., 2] = 200 # Red channel
591
+
592
+ # # Apply overlay
593
+ # result = np.where(
594
+ # dilated_mask[..., np.newaxis].astype(bool),
595
+ # cv.addWeighted(img, 0.7, red_overlay, 0.3, 0),
596
+ # img
597
+ # )
598
+
599
+ # # Convert back to original dtype
600
+ # if original_dtype != np.float32:
601
+ # result = np.clip(result, 0, 255).astype(original_dtype)
602
+
603
+ # return result, heatmap_np
604
+
605
+
606
+ def infer(self, img, heatmap=None, alpha=0.1):
607
+ '''
608
+ Perform inference with binary red mask and GPU-based dilation.
609
+ Heatmap is recalculated each frame (no temporal retention).
610
+
611
+ Returns:
612
+ - result: Image with red overlay where foreground is detected.
613
+ - heatmap_np: Numpy array of binary heatmap.
614
+ '''
615
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
616
+
617
+ # Convert image to float32 and move to GPU
618
+ original_dtype = img.dtype
619
+ img = np.asarray(img).astype(np.float32)
620
+ H, W, C = img.shape
621
+ img_tensor = torch.from_numpy(img).float().to(device)
622
+
623
+ # Initialize detection mask as all True (foreground by default)
624
+ detection_mask = torch.ones((H, W), dtype=torch.bool, device=device)
625
+
626
+ for k in range(self.K):
627
+ B_mask = (self.B >= (k + 1)).to(device)
628
+ mu_k = self.mu[:, :, k, :].to(device)
629
+ sigma_k = self.sigma[:, :, k, :, :].to(device)
630
+
631
+ delta = img_tensor - mu_k
632
+ delta = delta.unsqueeze(-1) # shape: (H, W, 3, 1)
633
+ sigma_inv = torch.linalg.inv(sigma_k)
634
+ temp = torch.matmul(sigma_inv, delta)
635
+ dist_sq = torch.matmul(delta.transpose(-2, -1), temp).squeeze(-1).squeeze(-1)
636
+ dist = torch.sqrt(dist_sq + 1e-5)
637
+
638
+ match_mask = (dist < 9.5) & B_mask
639
+ detection_mask[match_mask] = False
640
+ # img_tensor[match_mask] = mu_k[match_mask]
641
+
642
+ # Generate heatmap
643
+ foreground_mask = detection_mask & (img_tensor.abs().sum(dim=-1) > 0)
644
+ heatmap_tensor = torch.zeros((H, W), dtype=torch.float32, device=device)
645
+ heatmap_tensor[foreground_mask] = alpha
646
+
647
+ # Convert heatmap to binary mask and apply dilation (GPU-based)
648
+ binary_mask = (heatmap_tensor > 0.05).float().unsqueeze(0).unsqueeze(0) # shape: (1, 1, H, W)
649
+ kernel = torch.ones((1, 1, 5, 5), dtype=torch.float32, device=device)
650
+ dilated = F.conv2d(binary_mask, kernel, padding=2)
651
+ dilated_mask = (dilated > 0).squeeze().to(torch.bool)
652
+
653
+ # Create red overlay (on GPU)
654
+ red_overlay = torch.zeros_like(img_tensor)
655
+ red_overlay[..., 2] = 200 # Red channel
656
+
657
+ # Blend red overlay on detected regions
658
+ result_tensor = torch.where(
659
+ dilated_mask.unsqueeze(-1),
660
+ 0.7 * img_tensor + 0.3 * red_overlay,
661
+ img_tensor
662
+ )
663
+
664
+ # Convert back to NumPy and original dtype
665
+ result = result_tensor.clamp(0, 255).cpu().numpy()
666
+ if original_dtype != np.float32:
667
+ result = result.astype(original_dtype)
668
+
669
+ heatmap_np = (heatmap_tensor > 0.05).float().cpu().numpy()
670
+
671
+ return result, heatmap_np
672
+
README.md CHANGED
@@ -1,12 +1,12 @@
1
- ---
2
- title: Uvscan Kitchen Heatmap
3
- emoji: 🌍
4
- colorFrom: yellow
5
- colorTo: indigo
6
- sdk: gradio
7
- sdk_version: 5.33.2
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: Uvscan Kitchenheatmap
3
+ emoji: 📉
4
+ colorFrom: yellow
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 5.33.2
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ from PIL import Image
4
+ import cv2 # OpenCV for video processing (if used)
5
+ from processors.extract_frames import video_to_keyframes
6
+ from processors.apply_mask import apply_mask_and_crop
7
+ from processors.run_gmm import run_gmm_inference
8
+ from processors.compose_video import compose_final_video
9
+ # import the processing functions from original app
10
+ # from heatmap_module import video_to_keyframes, apply_mask_and_crop, run_gmm_inference, compose_final_video
11
+
12
+ # Helper to extract first frame for mask drawing
13
+ def get_first_frame(video_path):
14
+ cap = cv2.VideoCapture(video_path)
15
+ success, frame = cap.read()
16
+ cap.release()
17
+ if success:
18
+ # Convert BGR to RGB color for PIL/Gradio
19
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
20
+ return Image.fromarray(frame)
21
+ else:
22
+ return None
23
+
24
+ # Helper to get mask from drawn data
25
+ def extract_mask_from_drawn(composite_image, background_image):
26
+ # Convert to numpy arrays for comparison
27
+ comp = np.array(composite_image)
28
+ bg = np.array(background_image)
29
+ if comp.shape != bg.shape:
30
+ # If background not same shape as composite, just threshold comp
31
+ gray = comp if comp.ndim == 2 else comp[..., :3].mean(axis=-1)
32
+ mask = (gray > 10).astype(np.uint8) # simple threshold
33
+ else:
34
+ # Compute difference where composite != background (assuming draw color != background)
35
+ diff = np.any(comp != bg, axis=-1)
36
+ mask = diff.astype(np.uint8)
37
+ return mask * 255 # return as binary mask image (255 inside mask)
38
+
39
+ def process_video(video_file, mask_image, drawn_editor, progress=gr.Progress()):
40
+ # video_file: path to uploaded video
41
+ # mask_image: numpy array (HxW or HxWx3) if uploaded, or None
42
+ # drawn_editor: dict with 'background', 'composite' from ImageEditor, or None
43
+
44
+ # Decide mask source
45
+ mask = None
46
+ if mask_image is not None:
47
+ # Ensure mask is binary (if user uploaded a colored mask, convert to gray)
48
+ mask = mask_image
49
+ if mask.ndim == 3:
50
+ mask = cv2.cvtColor(mask, cv2.COLOR_RGB2GRAY)
51
+ _, mask = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)
52
+ elif drawn_editor is not None:
53
+ comp = drawn_editor["composite"]
54
+ bg = drawn_editor["background"]
55
+ mask = extract_mask_from_drawn(comp, bg)
56
+ else:
57
+ raise gr.Error("Please provide a mask (upload or draw).")
58
+
59
+ progress(0, desc="Extracting keyframes...")
60
+ frames = video_to_keyframes(video_file)
61
+ progress(0.3, desc="Applying mask and cropping...")
62
+ cropped_frames = apply_mask_and_crop(frames, mask)
63
+ progress(0.6, desc="Running inference on frames...")
64
+ output_frames = run_gmm_inference(cropped_frames)
65
+ progress(0.85, desc="Composing final video...")
66
+ result_path = compose_final_video(output_frames, "heatmap_output.mp4")
67
+ progress(1.0, desc="Done")
68
+
69
+ return "✅ Heatmap video generated!", result_path
70
+
71
+ # Define the Gradio app layout
72
+ custom_css = """
73
+ .gradio-container {background: url('/gradio_api/file=background.jpg') center/cover no-repeat !important;
74
+ background-color: #000 !important;}
75
+ .panel {max-width: 800px; margin: 2rem auto; padding: 2rem; background: rgba(30,30,30, 0.8); border-radius: 8px;}
76
+ """
77
+ with gr.Blocks(theme=gr.themes.Monochrome(), css=custom_css, title="Heatmap Generator") as demo:
78
+ gr.Markdown("## 🎥 Heatmap Generator", elem_classes="panel")
79
+ with gr.Row(elem_classes="panel"):
80
+ video_input = gr.Video(label="Upload Video", format="mp4")
81
+ with gr.Tabs(elem_classes="panel"):
82
+ with gr.Tab("Upload Mask"):
83
+ mask_upload = gr.Image(label="Upload Mask Image", type="numpy")
84
+ with gr.Tab("Draw Mask"):
85
+ draw_info = gr.Markdown("*Draw mask on the frame:* Use brush to highlight the region of interest.")
86
+ mask_draw = gr.ImageEditor(label="Draw Mask", tool="brush", type="pil") # we'll get PIL images
87
+ # Buttons
88
+ with gr.Row(elem_classes="panel"):
89
+ generate_btn = gr.Button("🔥 Generate Heatmap", variant="primary")
90
+ reset_btn = gr.Button("Reset")
91
+ download_btn = gr.DownloadButton("Download Video", file_name="heatmap_output.mp4")
92
+ # Status and output
93
+ with gr.Row(elem_classes="panel"):
94
+ status_text = gr.Markdown("") # to show status or final message
95
+ with gr.Row(elem_classes="panel"):
96
+ output_video = gr.Video(label="Output Video")
97
+ # Event handlers
98
+ # When video is uploaded, extract a frame and set it in the draw component
99
+ def prep_frame_for_drawing(video_file):
100
+ if video_file is None:
101
+ return None
102
+ frame = get_first_frame(video_file)
103
+ return {'background': frame, 'composite': frame} # initial EditorValue
104
+ video_input.change(fn=prep_frame_for_drawing, inputs=video_input, outputs=mask_draw)
105
+ # Generate button triggers processing
106
+ generate_btn.click(fn=process_video, inputs=[video_input, mask_upload, mask_draw], outputs=[status_text, output_video])
107
+ # After video is generated, enable download (bind the file path from output)
108
+ # (Gradio may automatically handle download if output_video has a file source)
109
+ generate_btn.click(fn=lambda vid: vid, inputs=output_video, outputs=download_btn)
110
+ # Reset button clears all
111
+ reset_btn.click(fn=lambda: (None, None, None, "", None), inputs=[],
112
+ outputs=[video_input, mask_upload, mask_draw, status_text, output_video])
113
+ # Launch (if running locally; on HF Spaces this is handled automatically)
114
+ if __name__ == "__main__":
115
+ demo.launch()
gmm_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee1c6601c803d73838183b47e7a40ea79f8746135581af3d9a93b6a7151c16ba
3
+ size 15263359
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ kivy>=2.3.0
2
+ kivymd>=1.2.0
3
+ matplotlib>=3.5
4
+ opencv-python>=4.8
5
+ numpy>=1.23
6
+ scipy>=1.10
7
+ joblib>=1.3
8
+ torch>=2.0
9
+ Pillow>=9.5
10
+ gradio==3.35.2