Spaces:

aharley
/

alltracker

Running on Zero

App Files Files Community

aharley commited on 12 days ago

Commit

09e82bb

1 Parent(s): c0337cc

added gpu-based drawing

Browse files

Files changed (1) hide show

app.py +234 -7

app.py CHANGED Viewed

@@ -4,6 +4,8 @@
 import os
 import sys
 import uuid
 import gradio as gr
 import mediapy
@@ -93,6 +95,211 @@ def get_points_on_a_grid(
     )
     return torch.stack([grid_x, grid_y], dim=-1).reshape(1, -1, 2)
 def paint_point_track(
     frames: np.ndarray,
     point_tracks: np.ndarray,
@@ -330,7 +537,8 @@ def preprocess_video_input(video_path):
 def track(
     video_preview,
     video_input,
-    video_fps,
     query_points,
     query_points_color,
     query_count,
@@ -338,6 +546,10 @@ def track(
     # tracking_mode = 'selected'
     # if query_count == 0:
     #     tracking_mode = 'grid'
     device = "cuda" if torch.cuda.is_available() else "cpu"
     dtype = torch.float if device == "cuda" else torch.float
@@ -407,7 +619,7 @@ def track(
     #     add_support_grid=True
-    query_frame = 0
     torch.cuda.empty_cache()
@@ -444,11 +656,17 @@ def track(
     #     colors.extend(frame_colors)
     # colors = np.array(colors)
-    traj_maps_e = traj_maps_e[:,:,:,::4,::4] # subsample
-    visconf_maps_e = visconf_maps_e[:,:,:,::4,::4] # subsample
     tracks = traj_maps_e.permute(0,3,4,1,2).reshape(-1,T,2).numpy()
-    visibs = visconf_maps_e.permute(0,3,4,1,2).reshape(-1,T,2)[:,:,0].numpy() > 0.9
     # sc = (np.array([video_preview.shape[2], video_preview.shape[1]]) / np.array([VIDEO_INPUT_RESO[1], VIDEO_INPUT_RESO[0]])).reshape(1,1,2)
     # print('sc', sc)
@@ -467,8 +685,15 @@ def track(
     for frame_colors in query_points_color:
         colors.extend(frame_colors)
     colors = np.array(colors)
-    painted_video = paint_point_track(video_preview,tracks,visibs,colors)
     print("7 torch.cuda.memory_allocated: %.1fGB"%(torch.cuda.memory_allocated(0)/1024/1024/1024))
     # save video
@@ -546,7 +771,8 @@ with gr.Blocks() as demo:
             with gr.Row():
                 current_frame = gr.Image(
-                    label="Click to add query points",
                     type="numpy",
                     interactive=False
                 )
@@ -679,6 +905,7 @@ with gr.Blocks() as demo:
             video_preview,
             video_input,
             video_fps,
             query_points,
             query_points_color,
             query_count,

 import os
 import sys
 import uuid
+from concurrent.futures import ThreadPoolExecutor
 import gradio as gr
 import mediapy
     )
     return torch.stack([grid_x, grid_y], dim=-1).reshape(1, -1, 2)
+def paint_point_track_gpu_scatter(
+        frames: np.ndarray,
+        point_tracks: np.ndarray,
+        visibles: np.ndarray,
+        colormap: Optional[List[Tuple[int, int, int]]] = None,
+        radius: int = 2,
+        sharpness: float = 0.15,
+) -> np.ndarray:
+    print('starting vis')
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    frames_t = torch.from_numpy(frames).float().permute(0, 3, 1, 2).to(device)  # [T,C,H,W]
+    point_tracks_t = torch.from_numpy(point_tracks).to(device)  # [P,T,2]
+    visibles_t = torch.from_numpy(visibles).to(device)  # [P,T]
+    T, C, H, W = frames_t.shape
+    P = point_tracks.shape[0]
+    if colormap is None:
+        colormap = get_colors(P)
+    colors = torch.tensor(colormap, dtype=torch.float32, device=device)  # [P,3]
+    D = radius * 2 + 1
+    y = torch.arange(D, device=device).float()[:, None] - radius
+    x = torch.arange(D, device=device).float()[None, :] - radius
+    dist2 = x**2 + y**2
+    icon = torch.clamp(1 - (dist2 - (radius**2) / 2.0) / (radius * 2 * sharpness), 0, 1)  # [D,D]
+    icon = icon.view(1, D, D)
+    dx = torch.arange(-radius, radius + 1, device=device)
+    dy = torch.arange(-radius, radius + 1, device=device)
+    disp_y, disp_x = torch.meshgrid(dy, dx, indexing="ij")  # [D,D]
+    for t in range(T):
+        mask = visibles_t[:, t]  # [P]
+        if mask.sum() == 0:
+            continue
+        xy = point_tracks_t[mask, t] + 0.5  # [N,2]
+        xy[:, 0] = xy[:, 0].clamp(0, W - 1)
+        xy[:, 1] = xy[:, 1].clamp(0, H - 1)
+        colors_now = colors[mask]  # [N,3]
+        N = xy.shape[0]
+        cx = xy[:, 0].long()  # [N]
+        cy = xy[:, 1].long()
+        x_grid = cx[:, None, None] + disp_x  # [N,D,D]
+        y_grid = cy[:, None, None] + disp_y  # [N,D,D]
+        valid = (x_grid >= 0) & (x_grid < W) & (y_grid >= 0) & (y_grid < H)
+        x_valid = x_grid[valid]  # [K]
+        y_valid = y_grid[valid]
+        icon_weights = icon.expand(N, D, D)[valid]  # [K]
+        colors_valid = colors_now[:, :, None, None].expand(N, 3, D, D).permute(1, 0, 2, 3)[
+            :, valid
+        ]  # [3, K]
+        idx_flat = (y_valid * W + x_valid).long()  # [K]
+        accum = torch.zeros_like(frames_t[t])  # [3, H, W]
+        weight = torch.zeros(1, H * W, device=device)  # [1, H*W]
+        img_flat = accum.view(C, -1)  # [3, H*W]
+        weighted_colors = colors_valid * icon_weights  # [3, K]
+        img_flat.scatter_add_(1, idx_flat.unsqueeze(0).expand(C, -1), weighted_colors)
+        weight.scatter_add_(1, idx_flat.unsqueeze(0), icon_weights.unsqueeze(0))
+        weight = weight.view(1, H, W)
+        # accum = accum / (weight + 1e-6)  # avoid division by 0
+        # frames_t[t] = torch.where(weight > 0, accum, frames_t[t])
+        # frames_t[t] = frames_t[t] * (1 - weight) + accum
+        # alpha = weight.clamp(0, 1)
+        alpha = weight.clamp(0, 1) * 0.75  # transparency
+        accum = accum / (weight + 1e-6)  # [3, H, W]
+        frames_t[t] = frames_t[t] * (1 - alpha) + accum * alpha
+        # img_flat = frames_t[t].view(C, -1)  # [3, H*W]
+        # weighted_colors = colors_valid * icon_weights  # [3, K]
+        # img_flat.scatter_add_(1, idx_flat.unsqueeze(0).expand(C, -1), weighted_colors)
+    print('done vis')
+    return frames_t.clamp(0, 255).byte().permute(0, 2, 3, 1).cpu().numpy()
+def paint_point_track_gpu(
+        frames: np.ndarray,
+        point_tracks: np.ndarray,
+        visibles: np.ndarray,
+        colormap: Optional[List[Tuple[int, int, int]]] = None,
+        radius: int = 2,
+        sharpness: float = 0.15,
+) -> np.ndarray:
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    # Setup
+    frames_t = torch.from_numpy(frames).float().permute(0, 3, 1, 2).to(device)  # [T,C,H,W]
+    point_tracks_t = torch.from_numpy(point_tracks).to(device)  # [P,T,2]
+    visibles_t = torch.from_numpy(visibles).to(device)  # [P,T]
+    T, C, H, W = frames_t.shape
+    P = point_tracks.shape[0]
+    # Colors
+    if colormap is None:
+        colormap = get_colors(P)  # or any fixed list of RGB
+    colors = torch.tensor(colormap, dtype=torch.float32, device=device)  # [P,3]
+    # Icon kernel [K,K]
+    D = radius * 2 + 1
+    y = torch.arange(D, device=device).float()[:, None] - radius - 1
+    x = torch.arange(D, device=device).float()[None, :] - radius - 1
+    dist2 = x**2 + y**2
+    icon = torch.clamp(1 - (dist2 - (radius**2) / 2.0) / (radius * 2 * sharpness), 0, 1)  # [D,D]
+    icon = icon.unsqueeze(0)  # [1,D,D] for broadcasting
+    # Create coordinate grids
+    for t in range(T):
+        image = frames_t[t]
+        # Select visible points
+        visible_mask = visibles_t[:, t]
+        pt_xy = point_tracks_t[visible_mask, t]  # [N,2]
+        colors_t = colors[visible_mask]  # [N,3]
+        N = pt_xy.shape[0]
+        if N == 0:
+            continue
+        # Integer centers
+        pt_xy = pt_xy + 0.5  # correct center offset
+        pt_xy[:, 0] = pt_xy[:, 0].clamp(0, W - 1)
+        pt_xy[:, 1] = pt_xy[:, 1].clamp(0, H - 1)
+        ix = pt_xy[:, 0].long()  # [N]
+        iy = pt_xy[:, 1].long()
+        # Build grid of indices for patch around each point
+        dx = torch.arange(-radius, radius + 1, device=device)
+        dy = torch.arange(-radius, radius + 1, device=device)
+        dx_grid, dy_grid = torch.meshgrid(dx, dy, indexing='ij')
+        dx_flat = dx_grid.reshape(-1)
+        dy_flat = dy_grid.reshape(-1)
+        patch_x = ix[:, None] + dx_flat[None, :]  # [N,K*K]
+        patch_y = iy[:, None] + dy_flat[None, :]  # [N,K*K]
+        # Mask out-of-bounds
+        valid = (patch_x >= 0) & (patch_x < W) & (patch_y >= 0) & (patch_y < H)
+        flat_idx = (patch_y * W + patch_x).long()  # [N,K*K]
+        # Flatten icon and colors
+        icon_flat = icon.view(1, -1)  # [1, K*K]
+        color_patches = colors_t[:, :, None] * icon_flat[:, None, :]  # [N,3,K*K]
+        # Flatten to write into 1D image
+        img_flat = image.view(C, -1)  # [3, H*W]
+        for i in range(N):
+            valid_mask = valid[i]
+            idxs = flat_idx[i][valid_mask]
+            vals = color_patches[i, :, valid_mask]  # [3, valid_count]
+            img_flat[:, idxs] += vals
+    out_frames = frames_t.clamp(0, 255).byte().permute(0, 2, 3, 1).cpu().numpy()
+    return out_frames
+def paint_point_track_parallel(
+    frames: np.ndarray,
+    point_tracks: np.ndarray,
+    visibles: np.ndarray,
+    colormap: Optional[List[Tuple[int, int, int]]] = None,
+    max_workers: int = 8,
+) -> np.ndarray:
+    num_points, num_frames = point_tracks.shape[:2]
+    if colormap is None:
+        colormap = get_colors(num_colors=num_points)
+    height, width = frames.shape[1:3]
+    radius = 2
+    print('radius', radius)
+    diam = radius * 2 + 1
+    # Precompute the icon and its bilinear components
+    quadratic_y = np.square(np.arange(diam)[:, np.newaxis] - radius - 1)
+    quadratic_x = np.square(np.arange(diam)[np.newaxis, :] - radius - 1)
+    icon = (quadratic_y + quadratic_x) - (radius**2) / 2.0
+    sharpness = 0.15
+    icon = np.clip(icon / (radius * 2 * sharpness), 0, 1)
+    icon = 1 - icon[:, :, np.newaxis]
+    icon1 = np.pad(icon, [(0, 1), (0, 1), (0, 0)])
+    icon2 = np.pad(icon, [(1, 0), (0, 1), (0, 0)])
+    icon3 = np.pad(icon, [(0, 1), (1, 0), (0, 0)])
+    icon4 = np.pad(icon, [(1, 0), (1, 0), (0, 0)])
+    def draw_point(image, i, t):
+        if not visibles[i, t]:
+            return
+        x, y = point_tracks[i, t, :] + 0.5
+        x = min(max(x, 0.0), width)
+        y = min(max(y, 0.0), height)
+        x1, y1 = np.floor(x).astype(np.int32), np.floor(y).astype(np.int32)
+        x2, y2 = x1 + 1, y1 + 1
+        patch = (
+            icon1 * (x2 - x) * (y2 - y)
+            + icon2 * (x2 - x) * (y - y1)
+            + icon3 * (x - x1) * (y2 - y)
+            + icon4 * (x - x1) * (y - y1)
+        )
+        x_ub = x1 + 2 * radius + 2
+        y_ub = y1 + 2 * radius + 2
+        image[y1:y_ub, x1:x_ub, :] = (1 - patch) * image[y1:y_ub, x1:x_ub, :] + patch * np.array(colormap[i])[np.newaxis, np.newaxis, :]
+    video = frames.copy()
+    for t in range(num_frames):
+        image = np.pad(
+            video[t],
+            [(radius + 1, radius + 1), (radius + 1, radius + 1), (0, 0)],
+        )
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            futures = [executor.submit(draw_point, image, i, t) for i in range(num_points)]
+            _ = [f.result() for f in futures]  # wait for all threads
+        video[t] = image[radius + 1 : -radius - 1, radius + 1 : -radius - 1].astype(np.uint8)
+    return video
 def paint_point_track(
     frames: np.ndarray,
     point_tracks: np.ndarray,
 def track(
     video_preview,
     video_input,
+    video_fps,
+    query_frame,
     query_points,
     query_points_color,
     query_count,
     # tracking_mode = 'selected'
     # if query_count == 0:
     #     tracking_mode = 'grid'
+    # print('query_frames', query_frames)
+    # query_frame = int(query_frames[0])
+    # # query_frame = 0
     device = "cuda" if torch.cuda.is_available() else "cpu"
     dtype = torch.float if device == "cuda" else torch.float
     #     add_support_grid=True
+    # query_frame = 0
     torch.cuda.empty_cache()
     #     colors.extend(frame_colors)
     # colors = np.array(colors)
+    # traj_maps_e = traj_maps_e[:,:,:,::4,::4] # subsample
+    # visconf_maps_e = visconf_maps_e[:,:,:,::4,::4] # subsample
+    traj_maps_e = traj_maps_e[:,:,:,::2,::2] # subsample
+    visconf_maps_e = visconf_maps_e[:,:,:,::2,::2] # subsample
     tracks = traj_maps_e.permute(0,3,4,1,2).reshape(-1,T,2).numpy()
+    visibs = visconf_maps_e.permute(0,3,4,1,2).reshape(-1,T,2)[:,:,0].numpy()
+    confs = visconf_maps_e.permute(0,3,4,1,2).reshape(-1,T,2)[:,:,0].numpy()
+    visibs = (visibs * confs) > 0.9 # N,T
     # sc = (np.array([video_preview.shape[2], video_preview.shape[1]]) / np.array([VIDEO_INPUT_RESO[1], VIDEO_INPUT_RESO[0]])).reshape(1,1,2)
     # print('sc', sc)
     for frame_colors in query_points_color:
         colors.extend(frame_colors)
     colors = np.array(colors)
+    inds = np.sum(visibs * 1.0, axis=1) >= min(T//4,3)
+    tracks = tracks[inds]
+    visibs = visibs[inds]
+    colors = colors[inds]
+    # painted_video = paint_point_track_parallel(video_preview,tracks,visibs,colors)
+    # painted_video = paint_point_track_gpu(video_preview,tracks,visibs,colors)
+    painted_video = paint_point_track_gpu_scatter(video_preview,tracks,visibs,colors)
     print("7 torch.cuda.memory_allocated: %.1fGB"%(torch.cuda.memory_allocated(0)/1024/1024/1024))
     # save video
             with gr.Row():
                 current_frame = gr.Image(
+                    # label="Click to add query points",
+                    label="Query frame",
                     type="numpy",
                     interactive=False
                 )
             video_preview,
             video_input,
             video_fps,
+            query_frames,
             query_points,
             query_points_color,
             query_count,