Spaces:

mlbench123
/

uvscan_kitchen_heatmap

Sleeping

App Files Files Community

mlbench123 commited on Jun 12

Commit

06927d3

verified ·

1 Parent(s): 51f3b1c

Upload 7 files

Browse files

Files changed (7) hide show

.gitattributes +35 -35
.gitignore +1 -0
GMM.py +672 -0
README.md +12 -12
app.py +115 -0
gmm_model.joblib +3 -0
requirements.txt +10 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,35 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ venv/

GMM.py ADDED Viewed

	@@ -0,0 +1,672 @@

+import numpy as np
+import cv2 as cv
+import os
+from numpy.linalg import norm, inv
+from scipy.stats import multivariate_normal as mv_norm
+import joblib  # or import pickle
+import os
+import torch
+from torch.distributions import MultivariateNormal
+import torch.nn.functional as F
+init_weight = [0.7, 0.11, 0.1, 0.09]
+init_u = np.zeros(3)
+# initial Covariance matrix
+init_sigma = 225*np.eye(3)
+init_alpha = 0.05
+class GMM():
+    def __init__(self, data_dir, train_num, alpha=init_alpha):
+        self.data_dir = data_dir
+        self.train_num = train_num
+        self.alpha = alpha
+        self.img_shape = None
+        self.weight = None
+        self.mu = None
+        self.sigma = None
+        self.K = None
+        self.B = None
+    def check(self, pixel, mu, sigma):
+        '''
+        Check whether a pixel matches a Gaussian distribution.
+        Matching means the Mahalanobis distance is less than 2.5.
+        '''
+        # Convert to torch tensors on same device
+        if isinstance(mu, np.ndarray):
+            mu = torch.from_numpy(mu).float()
+        if isinstance(sigma, np.ndarray):
+            sigma = torch.from_numpy(sigma).float()
+        if isinstance(pixel, np.ndarray):
+            pixel = torch.from_numpy(pixel).float()
+        # Ensure all are on the same device
+        device = mu.device
+        pixel = pixel.to(device)
+        sigma = sigma.to(device)
+        # Compute Mahalanobis distance
+        delta = pixel - mu
+        sigma_inv = torch.linalg.inv(sigma)
+        d_squared = delta @ sigma_inv @ delta
+        d = torch.sqrt(d_squared + 1e-5)
+        return d.item() < 0.1
+    # def train(self, K=4):
+    #     '''
+    #     train model
+    #     '''
+    #     self.K = K
+    #     file_list = []
+    #     # file numbers are from 1 to train_number
+    #     for i in range(self.train_num):
+    #         file_name = os.path.join(self.data_dir, 'b%05d' % i + '.bmp')
+    #         file_list.append(file_name)
+    #     img_init = cv.imread(file_list[0])
+    #     img_shape = img_init.shape
+    #     self.img_shape = img_shape
+    #     self.weight = np.array([[init_weight for j in range(self.img_shape[1])] for i in range(self.img_shape[0])])
+    #     self.mu = np.array([[[init_u for k in range(self.K)] for j in range(img_shape[1])]
+    #                          for i in range(img_shape[0])])
+    #     self.sigma = np.array([[[init_sigma for k in range(self.K)] for j in range(img_shape[1])]
+    #                          for i in range(img_shape[0])])
+    #     self.B = np.ones(self.img_shape[0:2], dtype=int)
+    #     for i in range(img_shape[0]):
+    #         for j in range(img_shape[1]):
+    #             for k in range(self.K):
+    #                 self.mu[i][j][k] = np.array(img_init[i][j]).reshape(1,3)
+    #     for i in range(self.K):
+    #         print('u:{}'.format(self.mu[100][100][i]))
+    #     # update process
+    #     for file in file_list:
+    #         print('training:{}'.format(file))
+    #         img=cv.imread(file)
+    #         for i in range(img.shape[0]):
+    #             for j in range(img.shape[1]):
+    #                 # Check whether match the existing K Gaussian distributions
+    #                 match = -1
+    #                 for k in range(K):
+    #                     if self.check(img[i][j], self.mu[i][j][k], self.sigma[i][j][k]):
+    #                         match = k
+    #                         break
+    #                 # a match found
+    #                 if match != -1:
+    #                     mu = self.mu[i][j][k]
+    #                     sigma = self.sigma[i][j][k]
+    #                     x = img[i][j].astype(float)
+    #                     delta = x - mu
+    #                     rho = self.alpha * mv_norm.pdf(img[i][j], mu, sigma)
+    #                     self.weight[i][j] = (1 - self.alpha) * self.weight[i][j]
+    #                     self.weight[i][j][match] += self.alpha
+    #                     # self.weight[i][j][k] = self.weight[i][j][k] + self.alpha*(m - self.weight[i][j][k])
+    #                     self.mu[i][j][k] = mu + rho * delta
+    #                     self.sigma[i][j][k] = sigma + rho * (np.matmul(delta, delta.T) - sigma)
+    #                 # if none of the K distributions match the current value
+    #                 # the least probable distribution is replaced with a distribution
+    #                 # with current value as its mean, an initially high variance and low rior weight
+    #                 if match == -1:
+    #                     w_list = [self.weight[i][j][k] for k in range(K)]
+    #                     id = w_list.index(min(w_list))
+    #                     # weight keep same, replace mean with current value and set high variance
+    #                     self.mu[i][j][id] = np.array(img[i][j]).reshape(1, 3)
+    #                     self.sigma[i][j][id] = np.array(init_sigma)
+    #         print('img:{}'.format(img[100][100]))
+    #         print('weight:{}'.format(self.weight[100][100]))
+    #         self.reorder()
+    #         for i in range(self.K):
+    #             print('u:{}'.format(self.mu[100][100][i]))
+    def train(self, K=4):
+        '''
+        train model with GPU acceleration
+        '''
+        self.K = K
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        print(f"Using device: {device}")
+        file_list = []
+        for i in range(self.train_num):
+            file_name = os.path.join(self.data_dir, 'b%05d' % i + '.bmp')
+            file_list.append(file_name)
+        # Initialize with first image
+        img_init = cv.imread(file_list[0])
+        img_shape = img_shape = img_init.shape
+        self.img_shape = img_shape
+        height, width, channels = img_shape
+        # Initialize model parameters on GPU
+        self.weight = torch.full((height, width, K), 1.0/K,
+                            dtype=torch.float32, device=device)
+        self.mu = torch.zeros(height, width, K, 3,
+                        dtype=torch.float32, device=device)
+        self.sigma = torch.zeros(height, width, K, 3, 3,
+                            dtype=torch.float32, device=device)
+        self.B = torch.ones((height, width),
+                        dtype=torch.int32, device=device)
+        # Initialize mu with first image values
+        img_tensor = torch.from_numpy(img_init).float().to(device)
+        for k in range(K):
+            self.mu[:, :, k, :] = img_tensor
+        # Initialize sigma with identity matrix * 225
+        self.sigma[:] = torch.eye(3, device=device) * 225
+        # Training loop
+        for file in file_list:
+            print('training:{}'.format(file))
+            img = cv.imread(file)
+            img_tensor = torch.from_numpy(img).float().to(device)  # (H,W,3)
+            # Check matches for all pixels
+            matches = torch.full((height, width), -1, dtype=torch.long, device=device)
+            for k in range(K):
+                # Calculate Mahalanobis distance for each distribution
+                delta = img_tensor.unsqueeze(2) - self.mu  # (H,W,K,3)
+                sigma_inv = torch.linalg.inv(self.sigma)  # (H,W,K,3,3)
+                # Compute (x-μ)T Σ^-1 (x-μ)
+                temp = torch.einsum('hwki,hwkij->hwkj', delta, sigma_inv)
+                mahalanobis = torch.sqrt(torch.einsum('hwki,hwki->hwk', temp, delta))
+                # Update matches where distance < 2.5 and not already matched
+                match_mask = (mahalanobis[:,:,k] < 2.5) & (matches == -1)
+                matches[match_mask] = k
+            # Process matched pixels
+            for k in range(K):
+                # Get mask for current distribution matches
+                mask = matches == k
+                if mask.any():
+                    # Get matched pixels
+                    matched_pixels = img_tensor[mask]  # (N,3)
+                    matched_mu = self.mu[:,:,k,:][mask]  # (N,3)
+                    matched_sigma = self.sigma[:,:,k,:,:][mask]  # (N,3,3)
+                    try:
+                        # Create multivariate normal distribution
+                        mvn = MultivariateNormal(matched_mu,
+                                            covariance_matrix=matched_sigma)
+                        # Calculate rho
+                        rho = self.alpha * torch.exp(mvn.log_prob(matched_pixels))
+                        # Update weights
+                        self.weight[:,:,k][mask] = (1 - self.alpha) * self.weight[:,:,k][mask] + self.alpha
+                        # Update mu
+                        delta = matched_pixels - matched_mu
+                        self.mu[:,:,k,:][mask] += rho.unsqueeze(1) * delta
+                        # Update sigma
+                        delta_outer = torch.einsum('bi,bj->bij', delta, delta)
+                        sigma_update = rho.unsqueeze(1).unsqueeze(2) * (delta_outer - matched_sigma)
+                        self.sigma[:,:,k,:,:][mask] += sigma_update
+                    except RuntimeError as e:
+                        print(f"Error updating distribution {k}: {e}")
+                        continue
+            # Process non-matched pixels
+            non_matched = matches == -1
+            if non_matched.any():
+                # Find least probable distribution for each non-matched pixel
+                weight_non_matched = self.weight[non_matched]  # shape: (N, K)
+                min_weight_idx = torch.argmin(weight_non_matched, dim=1)  # shape: (N,)
+                # Create flat indices of non-matched pixels
+                non_matched_indices = non_matched.nonzero(as_tuple=False)  # shape: (N, 2)
+                for k in range(K):
+                    # Find positions where min_weight_idx == k
+                    k_mask = (min_weight_idx == k)
+                    if k_mask.any():
+                        selected_indices = non_matched_indices[k_mask]  # shape: (M, 2)
+                        y_idx = selected_indices[:, 0]
+                        x_idx = selected_indices[:, 1]
+                        # Update mu and sigma
+                        self.mu[y_idx, x_idx, k, :] = img_tensor[y_idx, x_idx]
+                        self.sigma[y_idx, x_idx, k, :, :] = torch.eye(3, device=device) * 225
+            # Convert to numpy for reordering and debug prints
+            weight_np = self.weight.cpu().numpy()
+            mu_np = self.mu.cpu().numpy()
+            sigma_np = self.sigma.cpu().numpy()
+            B_np = self.B.cpu().numpy()
+            print('img:{}'.format(img[100][100]))
+            print('weight:{}'.format(weight_np[100][100]))
+        # Update numpy arrays for reorder
+        self.weight = weight_np
+        self.mu = mu_np
+        self.sigma = sigma_np
+        self.B = B_np
+        self.reorder()
+        for i in range(self.K):
+            print('u:{}'.format(self.mu[100][100][i]))
+        # Move back to GPU for next iteration
+        self.weight = torch.from_numpy(self.weight).to(device)
+        self.mu = torch.from_numpy(self.mu).to(device)
+        self.sigma = torch.from_numpy(self.sigma).to(device)
+        self.B = torch.from_numpy(self.B).to(device)
+    def save_model(self, file_path):
+        """
+        Save the trained model to a file
+        """
+        # Only make directories if there is a directory in the path
+        dir_name = os.path.dirname(file_path)
+        if dir_name:
+            os.makedirs(dir_name, exist_ok=True)
+        joblib.dump({
+            'weight': self.weight,
+            'mu': self.mu,
+            'sigma': self.sigma,
+            'K': self.K,
+            'B': self.B,
+            'img_shape': self.img_shape,
+            'alpha': self.alpha,
+            'data_dir': self.data_dir,
+            'train_num': self.train_num
+        }, file_path)
+        print(f"Model saved to {file_path}")
+    @classmethod
+    def load_model(cls, file_path):
+        """
+        Load a trained model from file
+        """
+        data = joblib.load(file_path)
+        # Create new instance
+        gmm = cls(data['data_dir'], data['train_num'], data['alpha'])
+        # Restore all attributes
+        gmm.weight = data['weight']
+        gmm.mu = data['mu']
+        gmm.sigma = data['sigma']
+        gmm.K = data['K']
+        gmm.B = data['B']
+        gmm.img_shape = data['img_shape']
+        gmm.image_shape = data['img_shape']
+        print(f"Model loaded from {file_path}")
+        return gmm
+    def reorder(self, T=0.90):
+        '''
+        Reorder the estimated components based on the ratio pi / the norm of standard deviation.
+        The first B components are chosen as background components.
+        The default threshold is 0.90.
+        '''
+        epsilon = 1e-6  # to prevent divide-by-zero
+        for i in range(self.img_shape[0]):
+            for j in range(self.img_shape[1]):
+                k_weight = self.weight[i][j]
+                k_norm = []
+                for k in range(self.K):
+                    cov = self.sigma[i][j][k]
+                    try:
+                        if np.all(np.linalg.eigvals(cov) >= 0):
+                            stddev = np.sqrt(cov)
+                            k_norm.append(norm(stddev))
+                        else:
+                            k_norm.append(epsilon)
+                    except:
+                        k_norm.append(epsilon)
+                k_norm = np.array(k_norm)
+                ratio = k_weight / (k_norm + epsilon)
+                descending_order = np.argsort(-ratio)
+                self.weight[i][j] = self.weight[i][j][descending_order]
+                self.mu[i][j] = self.mu[i][j][descending_order]
+                self.sigma[i][j] = self.sigma[i][j][descending_order]
+                cum_weight = 0
+                for index, order in enumerate(descending_order):
+                    cum_weight += self.weight[i][j][index]
+                    if cum_weight > T:
+                        self.B[i][j] = index + 1
+                        break
+    # def infer(self, img, heatmap=None, alpha=0.1):
+    #     '''
+    #     Perform inference with a persistent heatmap that intensifies with movement.
+    #     '''
+    #     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    #     img_tensor = torch.from_numpy(img).float().to(device)  # (H, W, 3)
+    #     H, W, _ = img.shape
+    #     # Initialize heatmap on the first frame
+    #     if heatmap is None:
+    #         heatmap = torch.zeros((H, W), dtype=torch.float32, device=device)
+    #     # No need for an 'else' that converts from numpy,
+    #     # as we will pass the tensor back in subsequent calls.
+    #     # --- Your existing foreground detection logic remains the same ---
+    #     detection_mask = torch.ones((H, W), dtype=torch.bool, device=device)
+    #     for k in range(self.K):
+    #         B_mask = (self.B >= (k + 1)).to(device)
+    #         mu_k = self.mu[:, :, k, :].to(device)
+    #         sigma_k = self.sigma[:, :, k, :, :].to(device)
+    #         delta = (img_tensor - mu_k).unsqueeze(-1)
+    #         sigma_inv = torch.linalg.inv(sigma_k)
+    #         temp = torch.matmul(sigma_inv, delta)
+    #         dist_sq = torch.matmul(delta.transpose(-2, -1), temp).squeeze(-1).squeeze(-1)
+    #         dist = torch.sqrt(dist_sq + 1e-5)
+    #         match_mask = (dist < 9.5) & B_mask
+    #         detection_mask[match_mask] = False
+    #         img_tensor[match_mask] = mu_k[match_mask] # Optional: for visualization
+    #     foreground_mask = detection_mask & (img_tensor.abs().sum(dim=-1) > 0)
+    #     heatmap[foreground_mask] = torch.clamp(heatmap[foreground_mask] + alpha, 0, 1)
+    #     # Convert heatmap tensor to a numpy array for visualization
+    #     heatmap_np = heatmap.cpu().numpy()
+    #     # Apply the colormap (0 -> Blue, 1 -> Red)
+    #     heatmap_viz = cv.applyColorMap((heatmap_np * 255).astype(np.uint8), cv.COLORMAP_JET)
+    #     # Blend the heatmap with the original image
+    #     result = cv.addWeighted(img, 0.7, heatmap_viz, 0.5, 0)
+    #     # Return the blended image and the heatmap tensor for the next frame
+    #     return result, heatmap
+    #--------------------------------------------------------------------------------------------
+    # def infer(self, img, heatmap=None, decay_factor=0.95, alpha=0.1):
+    #     '''
+    #     Perform inference with improved heatmap reflecting persistence of foreground objects.
+    #     Default areas remain unchanged (no bluish tone), only heatmap areas are colored.
+    #     '''
+    #     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    #     img_tensor = torch.from_numpy(img).float().to(device)  # (H, W, 3)
+    #     H, W, _ = img.shape
+    #     # Initialize or move heatmap to tensor on device
+    #     if heatmap is None:
+    #         heatmap = torch.zeros((H, W), dtype=torch.float32, device=device)
+    #     else:
+    #         heatmap = torch.from_numpy(heatmap).float().to(device)
+    #     # Detection mask initialized to 1 (foreground), 0 means background
+    #     detection_mask = torch.ones((H, W), dtype=torch.bool, device=device)
+    #     for k in range(self.K):
+    #         B_mask = (self.B >= (k + 1)).to(device)
+    #         mu_k = self.mu[:, :, k, :].to(device)
+    #         sigma_k = self.sigma[:, :, k, :, :].to(device)
+    #         delta = img_tensor - mu_k
+    #         delta = delta.unsqueeze(-1)
+    #         sigma_inv = torch.linalg.inv(sigma_k)
+    #         temp = torch.matmul(sigma_inv, delta)
+    #         dist_sq = torch.matmul(delta.transpose(-2, -1), temp).squeeze(-1).squeeze(-1)
+    #         dist = torch.sqrt(dist_sq + 1e-5)
+    #         match_mask = (dist < 9.5) & B_mask
+    #         # Mark matched pixels as background
+    #         detection_mask[match_mask] = False
+    #         img_tensor[match_mask] = mu_k[match_mask]
+    #     # Foreground mask (boolean tensor)
+    #     foreground_mask = detection_mask & (img_tensor.abs().sum(dim=-1) > 0)
+    #     # Update heatmap:
+    #     heatmap[foreground_mask] = torch.clamp(heatmap[foreground_mask] + alpha, 0, 1)
+    #     heatmap[~foreground_mask] *= decay_factor
+    #     # Convert heatmap to numpy for visualization
+    #     heatmap_np = heatmap.cpu().numpy()
+    #     # Create heatmap visualization
+    #     heatmap_viz = cv.applyColorMap((heatmap_np * 255).astype(np.uint8), cv.COLORMAP_JET)
+    #     # Create mask of significant heatmap areas (adjust threshold as needed)
+    #     significant_heat = (heatmap_np > 0.1)
+    #     # Initialize result with original image
+    #     result = img.copy()
+    #     # Only process if there are significant heat areas
+    #     if np.any(significant_heat):
+    #         # Ensure we have valid regions to blend
+    #         img_region = img[significant_heat]
+    #         heat_region = heatmap_viz[significant_heat]
+    #         # Only blend if we have valid regions
+    #         if img_region.size > 0 and heat_region.size > 0:
+    #             blended = cv.addWeighted(
+    #                 img_region, 0.7,
+    #                 heat_region, 0.3,
+    #                 0
+    #             )
+    #             result[significant_heat] = blended
+    #     return result, heatmap_np
+    #_____________________________________________________________________________________Decay factor and working good
+    # def infer(self, img, heatmap=None, decay_factor=0.95, alpha=0.1):
+    #     '''
+    #     Perform inference with binary red mask (no intensity variation) and dilation.
+    #     Returns:
+    #         - result: Image with solid red overlay on detections (same dtype as input)
+    #         - heatmap_np: Heatmap array
+    #     '''
+    #     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    #     # Ensure input is numpy array and get original dtype
+    #     original_dtype = img.dtype
+    #     img = np.asarray(img).astype(np.float32)
+    #     H, W, C = img.shape
+    #     # Initialize tensors
+    #     img_tensor = torch.from_numpy(img).float().to(device)
+    #     # Initialize heatmap
+    #     if heatmap is None:
+    #         heatmap = torch.zeros((H, W), dtype=torch.float32, device=device)
+    #     else:
+    #         heatmap = torch.from_numpy(heatmap).float().to(device)
+    #     # Detection processing (your original code)
+    #     detection_mask = torch.ones((H, W), dtype=torch.bool, device=device)
+    #     for k in range(self.K):
+    #         B_mask = (self.B >= (k + 1)).to(device)
+    #         mu_k = self.mu[:, :, k, :].to(device)
+    #         sigma_k = self.sigma[:, :, k, :, :].to(device)
+    #         delta = img_tensor - mu_k
+    #         delta = delta.unsqueeze(-1)
+    #         sigma_inv = torch.linalg.inv(sigma_k)
+    #         temp = torch.matmul(sigma_inv, delta)
+    #         dist_sq = torch.matmul(delta.transpose(-2, -1), temp).squeeze(-1).squeeze(-1)
+    #         dist = torch.sqrt(dist_sq + 1e-5)
+    #         match_mask = (dist < 9.5) & B_mask
+    #         detection_mask[match_mask] = False
+    #         img_tensor[match_mask] = mu_k[match_mask]
+    #     # Update heatmap
+    #     foreground_mask = detection_mask & (img_tensor.abs().sum(dim=-1) > 0)
+    #     heatmap[foreground_mask] = torch.clamp(heatmap[foreground_mask] + alpha, 0, 1)
+    #     heatmap[~foreground_mask] *= decay_factor
+    #     heatmap_np = heatmap.cpu().numpy()
+    #     # Create binary mask and dilate
+    #     binary_mask = (heatmap_np > 0.1).astype(np.uint8)
+    #     kernel = np.ones((5,5), np.uint8)
+    #     dilated_mask = cv.dilate(binary_mask, kernel, iterations=1)
+    #     # Create solid red overlay (BGR)
+    #     red_overlay = np.zeros_like(img)
+    #     red_overlay[..., 2] = 200  # Red channel
+    #     # Apply overlay using where instead of boolean indexing
+    #     result = np.where(
+    #         dilated_mask[..., np.newaxis].astype(bool),
+    #         cv.addWeighted(img, 0.7, red_overlay, 0.3, 0),
+    #         img
+    #     )
+    #     # Convert back to original dtype
+    #     if original_dtype != np.float32:
+    #         result = np.clip(result, 0, 255).astype(original_dtype)
+    #     return result, heatmap_np
+    #________________________________________________________________________________________________
+    # def infer(self, img, heatmap=None, alpha=0.1):
+    #     '''
+    #     Perform inference with binary red mask (no intensity variation) and dilation.
+    #     Heatmap is fully recalculated every frame — no temporal decay or retention.
+    #     Returns:
+    #         - result: Image with solid red overlay on detections
+    #         - heatmap_np: Binary heatmap array
+    #     '''
+    #     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    #     # Ensure input is numpy array and get original dtype
+    #     original_dtype = img.dtype
+    #     img = np.asarray(img).astype(np.float32)
+    #     H, W, C = img.shape
+    #     # Initialize tensors
+    #     img_tensor = torch.from_numpy(img).float().to(device)
+    #     # Detection processing
+    #     detection_mask = torch.ones((H, W), dtype=torch.bool, device=device)
+    #     for k in range(self.K):
+    #         B_mask = (self.B >= (k + 1)).to(device)
+    #         mu_k = self.mu[:, :, k, :].to(device)
+    #         sigma_k = self.sigma[:, :, k, :, :].to(device)
+    #         delta = img_tensor - mu_k
+    #         delta = delta.unsqueeze(-1)
+    #         sigma_inv = torch.linalg.inv(sigma_k)
+    #         temp = torch.matmul(sigma_inv, delta)
+    #         dist_sq = torch.matmul(delta.transpose(-2, -1), temp).squeeze(-1).squeeze(-1)
+    #         dist = torch.sqrt(dist_sq + 1e-5)
+    #         match_mask = (dist < 9.5) & B_mask
+    #         detection_mask[match_mask] = False
+    #         img_tensor[match_mask] = mu_k[match_mask]
+    #     # Generate a binary heatmap (no decay, no accumulation)
+    #     foreground_mask = detection_mask & (img_tensor.abs().sum(dim=-1) > 0)
+    #     heatmap = torch.zeros((H, W), dtype=torch.float32, device=device)
+    #     heatmap[foreground_mask] = alpha
+    #     heatmap_np = heatmap.cpu().numpy()
+    #     # Create binary mask and dilate
+    #     binary_mask = (heatmap_np > 0.05).astype(np.uint8)
+    #     kernel = np.ones((5, 5), np.uint8)
+    #     dilated_mask = cv.dilate(binary_mask, kernel, iterations=1)
+    #     # Create solid red overlay (BGR)
+    #     red_overlay = np.zeros_like(img)
+    #     red_overlay[..., 2] = 200  # Red channel
+    #     # Apply overlay
+    #     result = np.where(
+    #         dilated_mask[..., np.newaxis].astype(bool),
+    #         cv.addWeighted(img, 0.7, red_overlay, 0.3, 0),
+    #         img
+    #     )
+    #     # Convert back to original dtype
+    #     if original_dtype != np.float32:
+    #         result = np.clip(result, 0, 255).astype(original_dtype)
+    #     return result, heatmap_np
+    def infer(self, img, heatmap=None, alpha=0.1):
+        '''
+        Perform inference with binary red mask and GPU-based dilation.
+        Heatmap is recalculated each frame (no temporal retention).
+        Returns:
+            - result: Image with red overlay where foreground is detected.
+            - heatmap_np: Numpy array of binary heatmap.
+        '''
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        # Convert image to float32 and move to GPU
+        original_dtype = img.dtype
+        img = np.asarray(img).astype(np.float32)
+        H, W, C = img.shape
+        img_tensor = torch.from_numpy(img).float().to(device)
+        # Initialize detection mask as all True (foreground by default)
+        detection_mask = torch.ones((H, W), dtype=torch.bool, device=device)
+        for k in range(self.K):
+            B_mask = (self.B >= (k + 1)).to(device)
+            mu_k = self.mu[:, :, k, :].to(device)
+            sigma_k = self.sigma[:, :, k, :, :].to(device)
+            delta = img_tensor - mu_k
+            delta = delta.unsqueeze(-1)  # shape: (H, W, 3, 1)
+            sigma_inv = torch.linalg.inv(sigma_k)
+            temp = torch.matmul(sigma_inv, delta)
+            dist_sq = torch.matmul(delta.transpose(-2, -1), temp).squeeze(-1).squeeze(-1)
+            dist = torch.sqrt(dist_sq + 1e-5)
+            match_mask = (dist < 9.5) & B_mask
+            detection_mask[match_mask] = False
+            # img_tensor[match_mask] = mu_k[match_mask]
+        # Generate heatmap
+        foreground_mask = detection_mask & (img_tensor.abs().sum(dim=-1) > 0)
+        heatmap_tensor = torch.zeros((H, W), dtype=torch.float32, device=device)
+        heatmap_tensor[foreground_mask] = alpha
+        # Convert heatmap to binary mask and apply dilation (GPU-based)
+        binary_mask = (heatmap_tensor > 0.05).float().unsqueeze(0).unsqueeze(0)  # shape: (1, 1, H, W)
+        kernel = torch.ones((1, 1, 5, 5), dtype=torch.float32, device=device)
+        dilated = F.conv2d(binary_mask, kernel, padding=2)
+        dilated_mask = (dilated > 0).squeeze().to(torch.bool)
+        # Create red overlay (on GPU)
+        red_overlay = torch.zeros_like(img_tensor)
+        red_overlay[..., 2] = 200  # Red channel
+        # Blend red overlay on detected regions
+        result_tensor = torch.where(
+            dilated_mask.unsqueeze(-1),
+            0.7 * img_tensor + 0.3 * red_overlay,
+            img_tensor
+        )
+        # Convert back to NumPy and original dtype
+        result = result_tensor.clamp(0, 255).cpu().numpy()
+        if original_dtype != np.float32:
+            result = result.astype(original_dtype)
+        heatmap_np = (heatmap_tensor > 0.05).float().cpu().numpy()
+        return result, heatmap_np

README.md CHANGED Viewed

@@ -1,12 +1,12 @@
----
-title: Uvscan Kitchen Heatmap
-emoji: 🌍
-colorFrom: yellow
-colorTo: indigo
-sdk: gradio
-sdk_version: 5.33.2
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Uvscan Kitchenheatmap
+emoji: 📉
+colorFrom: yellow
+colorTo: blue
+sdk: gradio
+sdk_version: 5.33.2
+app_file: app.py
+pinned: false
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import gradio as gr
+import numpy as np
+from PIL import Image
+import cv2  # OpenCV for video processing (if used)
+from processors.extract_frames import video_to_keyframes
+from processors.apply_mask import apply_mask_and_crop
+from processors.run_gmm import run_gmm_inference
+from processors.compose_video import compose_final_video
+# import the processing functions from original app
+# from heatmap_module import video_to_keyframes, apply_mask_and_crop, run_gmm_inference, compose_final_video
+# Helper to extract first frame for mask drawing
+def get_first_frame(video_path):
+    cap = cv2.VideoCapture(video_path)
+    success, frame = cap.read()
+    cap.release()
+    if success:
+        # Convert BGR to RGB color for PIL/Gradio
+        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        return Image.fromarray(frame)
+    else:
+        return None
+# Helper to get mask from drawn data
+def extract_mask_from_drawn(composite_image, background_image):
+    # Convert to numpy arrays for comparison
+    comp = np.array(composite_image)
+    bg = np.array(background_image)
+    if comp.shape != bg.shape:
+        # If background not same shape as composite, just threshold comp
+        gray = comp if comp.ndim == 2 else comp[..., :3].mean(axis=-1)
+        mask = (gray > 10).astype(np.uint8)  # simple threshold
+    else:
+        # Compute difference where composite != background (assuming draw color != background)
+        diff = np.any(comp != bg, axis=-1)
+        mask = diff.astype(np.uint8)
+    return mask * 255  # return as binary mask image (255 inside mask)
+def process_video(video_file, mask_image, drawn_editor, progress=gr.Progress()):
+    # video_file: path to uploaded video
+    # mask_image: numpy array (HxW or HxWx3) if uploaded, or None
+    # drawn_editor: dict with 'background', 'composite' from ImageEditor, or None
+    # Decide mask source
+    mask = None
+    if mask_image is not None:
+        # Ensure mask is binary (if user uploaded a colored mask, convert to gray)
+        mask = mask_image
+        if mask.ndim == 3:
+            mask = cv2.cvtColor(mask, cv2.COLOR_RGB2GRAY)
+        _, mask = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)
+    elif drawn_editor is not None:
+        comp = drawn_editor["composite"]
+        bg = drawn_editor["background"]
+        mask = extract_mask_from_drawn(comp, bg)
+    else:
+        raise gr.Error("Please provide a mask (upload or draw).")
+    progress(0, desc="Extracting keyframes...")
+    frames = video_to_keyframes(video_file)
+    progress(0.3, desc="Applying mask and cropping...")
+    cropped_frames = apply_mask_and_crop(frames, mask)
+    progress(0.6, desc="Running inference on frames...")
+    output_frames = run_gmm_inference(cropped_frames)
+    progress(0.85, desc="Composing final video...")
+    result_path = compose_final_video(output_frames, "heatmap_output.mp4")
+    progress(1.0, desc="Done")
+    return "✅ Heatmap video generated!", result_path
+# Define the Gradio app layout
+custom_css = """
+.gradio-container {background: url('/gradio_api/file=background.jpg') center/cover no-repeat !important;
+                   background-color: #000 !important;}
+.panel {max-width: 800px; margin: 2rem auto; padding: 2rem; background: rgba(30,30,30, 0.8); border-radius: 8px;}
+"""
+with gr.Blocks(theme=gr.themes.Monochrome(), css=custom_css, title="Heatmap Generator") as demo:
+    gr.Markdown("## 🎥 Heatmap Generator", elem_classes="panel")
+    with gr.Row(elem_classes="panel"):
+        video_input = gr.Video(label="Upload Video", format="mp4")
+    with gr.Tabs(elem_classes="panel"):
+        with gr.Tab("Upload Mask"):
+            mask_upload = gr.Image(label="Upload Mask Image", type="numpy")
+        with gr.Tab("Draw Mask"):
+            draw_info = gr.Markdown("*Draw mask on the frame:* Use brush to highlight the region of interest.")
+            mask_draw = gr.ImageEditor(label="Draw Mask", tool="brush", type="pil")  # we'll get PIL images
+    # Buttons
+    with gr.Row(elem_classes="panel"):
+        generate_btn = gr.Button("🔥 Generate Heatmap", variant="primary")
+        reset_btn = gr.Button("Reset")
+        download_btn = gr.DownloadButton("Download Video", file_name="heatmap_output.mp4")
+    # Status and output
+    with gr.Row(elem_classes="panel"):
+        status_text = gr.Markdown("")  # to show status or final message
+    with gr.Row(elem_classes="panel"):
+        output_video = gr.Video(label="Output Video")
+    # Event handlers
+    # When video is uploaded, extract a frame and set it in the draw component
+    def prep_frame_for_drawing(video_file):
+        if video_file is None:
+            return None
+        frame = get_first_frame(video_file)
+        return {'background': frame, 'composite': frame}  # initial EditorValue
+    video_input.change(fn=prep_frame_for_drawing, inputs=video_input, outputs=mask_draw)
+    # Generate button triggers processing
+    generate_btn.click(fn=process_video, inputs=[video_input, mask_upload, mask_draw], outputs=[status_text, output_video])
+    # After video is generated, enable download (bind the file path from output)
+    # (Gradio may automatically handle download if output_video has a file source)
+    generate_btn.click(fn=lambda vid: vid, inputs=output_video, outputs=download_btn)
+    # Reset button clears all
+    reset_btn.click(fn=lambda: (None, None, None, "", None), inputs=[],
+                    outputs=[video_input, mask_upload, mask_draw, status_text, output_video])
+# Launch (if running locally; on HF Spaces this is handled automatically)
+if __name__ == "__main__":
+    demo.launch()

gmm_model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ee1c6601c803d73838183b47e7a40ea79f8746135581af3d9a93b6a7151c16ba
+size 15263359

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+kivy>=2.3.0
+kivymd>=1.2.0
+matplotlib>=3.5
+opencv-python>=4.8
+numpy>=1.23
+scipy>=1.10
+joblib>=1.3
+torch>=2.0
+Pillow>=9.5
+gradio==3.35.2