Spaces:

aharley
/

alltracker

Running on Zero

App Files Files Community

aharley commited on 12 days ago

Commit

5426cac

1 Parent(s): 574fdd2

made fancy

Browse files

Files changed (1) hide show

app.py +25 -14

app.py CHANGED Viewed

@@ -103,12 +103,13 @@ def paint_point_track_gpu_scatter(
         point_tracks: np.ndarray,
         visibles: np.ndarray,
         colormap: Optional[List[Tuple[int, int, int]]] = None,
-        radius: int = 2,
         sharpness: float = 0.15,
 ) -> np.ndarray:
     print('starting vis')
     device = "cuda" if torch.cuda.is_available() else "cpu"
     frames_t = torch.from_numpy(frames).float().permute(0, 3, 1, 2).to(device)  # [T,C,H,W]
     point_tracks_t = torch.from_numpy(point_tracks).to(device)  # [P,T,2]
     visibles_t = torch.from_numpy(visibles).to(device)  # [P,T]
     T, C, H, W = frames_t.shape
@@ -159,7 +160,8 @@ def paint_point_track_gpu_scatter(
         # frames_t[t] = frames_t[t] * (1 - weight) + accum
         # alpha = weight.clamp(0, 1)
-        alpha = weight.clamp(0, 1) * 0.75  # transparency
         accum = accum / (weight + 1e-6)  # [3, H, W]
         frames_t[t] = frames_t[t] * (1 - alpha) + accum * alpha
@@ -256,7 +258,7 @@ def paint_point_track_parallel(
     if colormap is None:
         colormap = get_colors(num_colors=num_points)
     height, width = frames.shape[1:3]
-    radius = 2
     print('radius', radius)
     diam = radius * 2 + 1
     # Precompute the icon and its bilinear components
@@ -499,15 +501,15 @@ def preprocess_video_input(video_path):
         video_arr = video_arr[:FRAME_LIMIT]
         num_frames = FRAME_LIMIT
-    # Resize to preview size for faster processing, width = PREVIEW_WIDTH
     height, width = video_arr.shape[1:3]
     if height > width:
         new_height, new_width = PREVIEW_HEIGHT, int(PREVIEW_WIDTH * width / height)
     else:
         new_height, new_width = int(PREVIEW_WIDTH * height / width), PREVIEW_WIDTH
-    if height*width > 768*768:
         new_height = new_height*3//4
         new_width = new_width*3//4
     preview_video = mediapy.resize_video(video_arr, (new_height, new_width))
@@ -693,6 +695,7 @@ def track(
     # traj_maps_e = traj_maps_e[:,:,:,::4,::4] # subsample
     # visconf_maps_e = visconf_maps_e[:,:,:,::4,::4] # subsample
     traj_maps_e = traj_maps_e[:,:,:,::2,::2] # subsample
     visconf_maps_e = visconf_maps_e[:,:,:,::2,::2] # subsample
@@ -722,7 +725,9 @@ def track(
         colors.extend(frame_colors)
     colors = np.array(colors)
-    inds = np.sum(visibs * 1.0, axis=1) >= min(T//4,3)
     tracks = tracks[inds]
     visibs = visibs[inds]
     colors = colors[inds]
@@ -779,8 +784,7 @@ with gr.Blocks() as demo:
     gr.Markdown("# ⚡ AllTracker: Efficient Dense Point Tracking at High Resolution")
     gr.Markdown("<div style='text-align: left;'> \
-    <p>Welcome to <a href='https://alltracker.github.io/' target='_blank'>AllTracker</a>! This space demonstrates point (pixel) tracking in videos. \
-    The model tracks all pixels in a frame that you select.  </p> \
     <p>To get started, simply upload your <b>.mp4</b> video, or click on one of the example videos. The shorter the video, the faster the processing. We recommend submitting videos under 20 seconds long.</p> \
     <p>After picking a video, click \"Submit\" to load the frames into the app, and optionally choose a frame (using the slider), and then click \"Track\".</p> \
     <p>For full info on how this works, check out our <a href='https://github.com/aharley/alltracker/' target='_blank'>GitHub Repo</a>!</p> \
@@ -819,11 +823,11 @@ with gr.Blocks() as demo:
         # with gr.Column():
         #     gr.Markdown("Choose a video or upload one of your own.")
-    gr.Markdown("## Step 2: Select a frame, and click \"Track\"")
     with gr.Row():
         with gr.Column():
             with gr.Row():
-                query_frames = gr.Slider(
                     minimum=0, maximum=100, value=0, step=1, label="Choose Frame", interactive=False)
             # with gr.Row():
             #     undo = gr.Button("Undo", interactive=False)
@@ -842,6 +846,10 @@ with gr.Blocks() as demo:
                 track_button = gr.Button("Track", interactive=False)
         with gr.Column():
             output_video = gr.Video(
                 label="Output Video",
                 interactive=False,
@@ -862,7 +870,7 @@ with gr.Blocks() as demo:
             video_fps,
             # video_in_drawer,
             current_frame,
-            query_frames,
             query_points,
             query_points_color,
             is_tracked_query,
@@ -875,9 +883,9 @@ with gr.Blocks() as demo:
         queue = False
     )
-    query_frames.change(
         fn = choose_frame,
-        inputs = [query_frames, video_queried_preview],
         outputs = [
             current_frame,
         ],
@@ -959,6 +967,7 @@ with gr.Blocks() as demo:
     #     queue = False
     # )
     track_button.click(
         fn = track,
@@ -966,7 +975,7 @@ with gr.Blocks() as demo:
             video_preview,
             video_input,
             video_fps,
-            query_frames,
             query_points,
             query_points_color,
             query_count,
@@ -978,5 +987,7 @@ with gr.Blocks() as demo:
     )
 # demo.launch(show_api=False, show_error=True, debug=False, share=False)
 demo.launch(show_api=False, show_error=True, debug=False, share=True)

         point_tracks: np.ndarray,
         visibles: np.ndarray,
         colormap: Optional[List[Tuple[int, int, int]]] = None,
+        radius: int = 1,
         sharpness: float = 0.15,
 ) -> np.ndarray:
     print('starting vis')
     device = "cuda" if torch.cuda.is_available() else "cpu"
     frames_t = torch.from_numpy(frames).float().permute(0, 3, 1, 2).to(device)  # [T,C,H,W]
+    frames_t = frames_t * 0.5 # darken, to see the point tracks better
     point_tracks_t = torch.from_numpy(point_tracks).to(device)  # [P,T,2]
     visibles_t = torch.from_numpy(visibles).to(device)  # [P,T]
     T, C, H, W = frames_t.shape
         # frames_t[t] = frames_t[t] * (1 - weight) + accum
         # alpha = weight.clamp(0, 1)
+        # alpha = weight.clamp(0, 1) * 0.75  # transparency
+        alpha = weight.clamp(0, 1)  # transparency
         accum = accum / (weight + 1e-6)  # [3, H, W]
         frames_t[t] = frames_t[t] * (1 - alpha) + accum * alpha
     if colormap is None:
         colormap = get_colors(num_colors=num_points)
     height, width = frames.shape[1:3]
+    radius = 1
     print('radius', radius)
     diam = radius * 2 + 1
     # Precompute the icon and its bilinear components
         video_arr = video_arr[:FRAME_LIMIT]
         num_frames = FRAME_LIMIT
     height, width = video_arr.shape[1:3]
     if height > width:
         new_height, new_width = PREVIEW_HEIGHT, int(PREVIEW_WIDTH * width / height)
     else:
         new_height, new_width = int(PREVIEW_WIDTH * height / width), PREVIEW_WIDTH
+    if height*width > 768*1024:
         new_height = new_height*3//4
         new_width = new_width*3//4
+    new_height, new_width = new_height//8 * 8, new_width//8 * 8 # make it divisible by 8, partly to satisfy ffmpeg
     preview_video = mediapy.resize_video(video_arr, (new_height, new_width))
     # traj_maps_e = traj_maps_e[:,:,:,::4,::4] # subsample
     # visconf_maps_e = visconf_maps_e[:,:,:,::4,::4] # subsample
     traj_maps_e = traj_maps_e[:,:,:,::2,::2] # subsample
     visconf_maps_e = visconf_maps_e[:,:,:,::2,::2] # subsample
         colors.extend(frame_colors)
     colors = np.array(colors)
+    visibs_ = visibs * 1.0
+    visibs_ = visibs_[:,1:] * visibs_[:,:-1]
+    inds = np.sum(visibs_, axis=1) >= min(T//4,8)
     tracks = tracks[inds]
     visibs = visibs[inds]
     colors = colors[inds]
     gr.Markdown("# ⚡ AllTracker: Efficient Dense Point Tracking at High Resolution")
     gr.Markdown("<div style='text-align: left;'> \
+    <p>Welcome to <a href='https://alltracker.github.io/' target='_blank'>AllTracker</a>! This space demonstrates all-pixel tracking in videos.</p> \
     <p>To get started, simply upload your <b>.mp4</b> video, or click on one of the example videos. The shorter the video, the faster the processing. We recommend submitting videos under 20 seconds long.</p> \
     <p>After picking a video, click \"Submit\" to load the frames into the app, and optionally choose a frame (using the slider), and then click \"Track\".</p> \
     <p>For full info on how this works, check out our <a href='https://github.com/aharley/alltracker/' target='_blank'>GitHub Repo</a>!</p> \
         # with gr.Column():
         #     gr.Markdown("Choose a video or upload one of your own.")
+    gr.Markdown("## Step 2: Select a frame, and click \"Track\".")
     with gr.Row():
         with gr.Column():
             with gr.Row():
+                query_frame_slider = gr.Slider(
                     minimum=0, maximum=100, value=0, step=1, label="Choose Frame", interactive=False)
             # with gr.Row():
             #     undo = gr.Button("Undo", interactive=False)
                 track_button = gr.Button("Track", interactive=False)
         with gr.Column():
+            # with gr.Row():
+            #     rate_slider = gr.Slider(
+            #         minimum=1, maximum=16, value=1, step=1, label="Choose subsampling rate", interactive=False)
+            # with gr.Row():
             output_video = gr.Video(
                 label="Output Video",
                 interactive=False,
             video_fps,
             # video_in_drawer,
             current_frame,
+            query_frame_slider,
             query_points,
             query_points_color,
             is_tracked_query,
         queue = False
     )
+    query_frame_slider.change(
         fn = choose_frame,
+        inputs = [query_frame_slider, video_queried_preview],
         outputs = [
             current_frame,
         ],
     #     queue = False
     # )
+    # output_video = None
     track_button.click(
         fn = track,
             video_preview,
             video_input,
             video_fps,
+            query_frame_slider,
             query_points,
             query_points_color,
             query_count,
     )
 # demo.launch(show_api=False, show_error=True, debug=False, share=False)
 demo.launch(show_api=False, show_error=True, debug=False, share=True)