Spaces:

aharley
/

alltracker

Running on Zero

App Files Files Community

aharley commited on 7 days ago

Commit

c7646a0

1 Parent(s): 6cf1a23

added overlay option; fixed bugs

Browse files

Files changed (2) hide show

app.py +48 -22
utils/improc.py +1 -0

app.py CHANGED Viewed

@@ -20,6 +20,7 @@ import random
 from typing import List, Optional, Sequence, Tuple
 import spaces
 import numpy as np
 import utils.basic
 import utils.improc
@@ -105,12 +106,16 @@ def paint_point_track_gpu_scatter(
         visibles: np.ndarray,
         colormap: Optional[List[Tuple[int, int, int]]] = None,
         rate: int = 1,
         # sharpness: float = 0.1,
 ) -> np.ndarray:
     print('starting vis')
     device = "cuda" if torch.cuda.is_available() else "cpu"
     frames_t = torch.from_numpy(frames).float().permute(0, 3, 1, 2).to(device)  # [T,C,H,W]
-    frames_t = frames_t * 0.5 # darken, to see the point tracks better
     point_tracks_t = torch.from_numpy(point_tracks).to(device)  # [P,T,2]
     visibles_t = torch.from_numpy(visibles).to(device)  # [P,T]
     T, C, H, W = frames_t.shape
@@ -517,14 +522,14 @@ def choose_rate8(video_preview, video_fps, tracks, visibs):
 # def choose_rate16(video_preview, video_fps, tracks, visibs):
 #     return choose_rate(16, video_preview, video_fps, tracks, visibs)
-def update_vis(rate, cmap, video_preview, query_frame, video_fps, tracks, visibs):
     print('rate', rate)
     print('cmap', cmap)
     print('video_preview', video_preview.shape)
     T, H, W,_ = video_preview.shape
     tracks_ = tracks.reshape(H,W,T,2)[::rate,::rate].reshape(-1,T,2)
     visibs_ = visibs.reshape(H,W,T)[::rate,::rate].reshape(-1,T)
-    return paint_video(video_preview, query_frame, video_fps, tracks_, visibs_, rate=rate, cmap=cmap)
     # return video_preview_array[int(frame_num)]
 def preprocess_video_input(video_path):
@@ -570,15 +575,19 @@ def preprocess_video_input(video_path):
     )
-def paint_video(video_preview, query_frame, video_fps, tracks, visibs, rate=1, cmap="gist_rainbow"):
     print('video_preview', video_preview.shape)
     print('tracks', tracks.shape)
     T, H, W, _ = video_preview.shape
     query_count = tracks.shape[0]
     print('cmap', cmap)
     if cmap=="bremm":
         xy0 = tracks[:,query_frame] # N,2
         colors = utils.improc.get_2d_colors(xy0, H, W)
     else:
         cmap_ = matplotlib.colormaps.get_cmap(cmap)
@@ -594,7 +603,7 @@ def paint_video(video_preview, query_frame, video_fps, tracks, visibs, rate=1, c
             colors.extend(frame_colors)
         colors = np.array(colors)
-    painted_video = paint_point_track_gpu_scatter(video_preview,tracks,visibs,colors,rate=rate)#=max(rate//2,1))
     # save video
     video_file_name = uuid.uuid4().hex + ".mp4"
     video_path = os.path.join(os.path.dirname(__file__), "tmp")
@@ -609,7 +618,7 @@ def paint_video(video_preview, query_frame, video_fps, tracks, visibs, rate=1, c
             im = PIL.Image.fromarray(painted_video[ti])
             # im.save(temp_out_f, "PNG", subsampling=0, quality=80)
             im.save(temp_out_f)
-            print('saved', temp_out_f)
         # os.system('/usr/bin/ffmpeg -y -hide_banner -loglevel error -f image2 -framerate %d -pattern_type glob -i "%s/*.png" -c:v libx264 -crf 20 -pix_fmt yuv420p %s' % (video_fps, video_path, video_file_path))
         os.system('/usr/bin/ffmpeg -y -hide_banner -loglevel error -f image2 -framerate %d -pattern_type glob -i "%s/*.jpg" -c:v libx264 -crf 20 -pix_fmt yuv420p %s' % (video_fps, video_path, video_file_path))
         print('saved', video_file_path)
@@ -617,16 +626,19 @@ def paint_video(video_preview, query_frame, video_fps, tracks, visibs, rate=1, c
             # temp_out_f = '%s/%03d.png' % (video_path, ti)
             temp_out_f = '%s/%03d.jpg' % (video_path, ti)
             os.remove(temp_out_f)
-            print('deleted', temp_out_f)
     return video_file_path
 @spaces.GPU
 def track(
-    video_preview,
-    video_input,
-    video_fps,
-    query_frame,
 ):
     # tracking_mode = 'selected'
     # if query_count == 0:
@@ -774,7 +786,8 @@ def track(
     tracks = traj_maps_e.permute(0,3,4,1,2).reshape(-1,T,2).numpy()
     visibs = visconf_maps_e.permute(0,3,4,1,2).reshape(-1,T,2)[:,:,0].numpy()
     confs = visconf_maps_e.permute(0,3,4,1,2).reshape(-1,T,2)[:,:,0].numpy()
-    visibs = (visibs * confs) > 0.3 # N,T
     # visibs = (confs) > 0.1 # N,T
@@ -782,7 +795,7 @@ def track(
     # print('sc', sc)
     # tracks = tracks * sc
-    return paint_video(video_preview, query_frame, video_fps, tracks, visibs), tracks, visibs, gr.update(interactive=True), gr.update(interactive=True)
             # gr.update(interactive=True),
             # gr.update(interactive=True),
             # gr.update(interactive=True),
@@ -863,7 +876,7 @@ with gr.Blocks() as demo:
     gr.Markdown("# ⚡ AllTracker: Efficient Dense Point Tracking at High Resolution")
     gr.Markdown("<div style='text-align: left;'> \
-    <p>Welcome to <a href='https://alltracker.github.io/' target='_blank'>AllTracker</a>! This demo runs our model to perform all-pixel tracking in a video of your choice.</p> \
     <p>To get started, simply upload an mp4, or select one of the example videos. The shorter the video, the faster the processing. We recommend submitting videos under 20 seconds long.</p> \
     <p>After picking a video, click \"Submit\" to load the frames into the app, and optionally choose a query frame (using the slider), and then click \"Track\".</p> \
     <p>For full info on how this works, check out our <a href='https://github.com/aharley/alltracker/' target='_blank'>GitHub repo</a>, or <a href='https://arxiv.org/abs/2506.07310' target='_blank'>paper</a>.</p> \
@@ -909,7 +922,7 @@ with gr.Blocks() as demo:
         with gr.Column():
             with gr.Row():
                 query_frame_slider = gr.Slider(
-                    minimum=0, maximum=100, value=0, step=1, label="Choose frame", interactive=False)
             # with gr.Row():
             #     undo = gr.Button("Undo", interactive=False)
             #     clear_frame = gr.Button("Clear Frame", interactive=False)
@@ -937,11 +950,12 @@ with gr.Blocks() as demo:
             with gr.Row():
                 # rate_slider = gr.Slider(
                 #     minimum=1, maximum=16, value=1, step=1, label="Choose subsampling rate", interactive=False)
-                rate_radio = gr.Radio([1, 2, 4, 8, 16], value=1, label="Choose visualization subsampling", interactive=False)
             with gr.Row():
-                cmap_radio = gr.Radio(["gist_rainbow", "rainbow", "jet", "turbo", "bremm"], value="gist_rainbow", label="Choose colormap", interactive=False)
             with gr.Row():
                 output_video = gr.Video(
                     label="Output video",
@@ -1066,12 +1080,16 @@ with gr.Blocks() as demo:
             video_input,
             video_fps,
             query_frame_slider,
         ],
         outputs = [
             output_video,
             tracks,
             visibs,
             rate_radio,
             cmap_radio,
             # rate1_button,
             # rate2_button,
@@ -1092,7 +1110,7 @@ with gr.Blocks() as demo:
     # )
     rate_radio.change(
         fn = update_vis,
-        inputs = [rate_radio, cmap_radio, video_preview, query_frame_slider, video_fps, tracks, visibs],
         outputs = [
             output_video,
         ],
@@ -1100,7 +1118,15 @@ with gr.Blocks() as demo:
     )
     cmap_radio.change(
         fn = update_vis,
-        inputs = [rate_radio, cmap_radio, video_preview, query_frame_slider, video_fps, tracks, visibs],
         outputs = [
             output_video,
         ],

 from typing import List, Optional, Sequence, Tuple
 import spaces
 import numpy as np
+import utils.py
 import utils.basic
 import utils.improc
         visibles: np.ndarray,
         colormap: Optional[List[Tuple[int, int, int]]] = None,
         rate: int = 1,
+        show_bkg=True,
         # sharpness: float = 0.1,
 ) -> np.ndarray:
     print('starting vis')
     device = "cuda" if torch.cuda.is_available() else "cpu"
     frames_t = torch.from_numpy(frames).float().permute(0, 3, 1, 2).to(device)  # [T,C,H,W]
+    if show_bkg:
+        frames_t = frames_t * 0.5 # darken, to see the point tracks better
+    else:
+        frames_t = frames_t * 0.0 # black out
     point_tracks_t = torch.from_numpy(point_tracks).to(device)  # [P,T,2]
     visibles_t = torch.from_numpy(visibles).to(device)  # [P,T]
     T, C, H, W = frames_t.shape
 # def choose_rate16(video_preview, video_fps, tracks, visibs):
 #     return choose_rate(16, video_preview, video_fps, tracks, visibs)
+def update_vis(rate, show_bkg, cmap, video_preview, query_frame, video_fps, tracks, visibs):
     print('rate', rate)
     print('cmap', cmap)
     print('video_preview', video_preview.shape)
     T, H, W,_ = video_preview.shape
     tracks_ = tracks.reshape(H,W,T,2)[::rate,::rate].reshape(-1,T,2)
     visibs_ = visibs.reshape(H,W,T)[::rate,::rate].reshape(-1,T)
+    return paint_video(video_preview, query_frame, video_fps, tracks_, visibs_, rate=rate, show_bkg=show_bkg, cmap=cmap)
     # return video_preview_array[int(frame_num)]
 def preprocess_video_input(video_path):
     )
+def paint_video(video_preview, query_frame, video_fps, tracks, visibs, rate=1, show_bkg=True, cmap="gist_rainbow"):
     print('video_preview', video_preview.shape)
     print('tracks', tracks.shape)
     T, H, W, _ = video_preview.shape
     query_count = tracks.shape[0]
     print('cmap', cmap)
+    print('query_frame', query_frame)
     if cmap=="bremm":
+        # xy0 = tracks
         xy0 = tracks[:,query_frame] # N,2
+        # print('xyQ', xy0[:10])
+        # print('xy0', tracks[:10,0])
+        # print('xy1', tracks[:10,1])
         colors = utils.improc.get_2d_colors(xy0, H, W)
     else:
         cmap_ = matplotlib.colormaps.get_cmap(cmap)
             colors.extend(frame_colors)
         colors = np.array(colors)
+    painted_video = paint_point_track_gpu_scatter(video_preview,tracks,visibs,colors,rate=rate,show_bkg=show_bkg)#=max(rate//2,1))
     # save video
     video_file_name = uuid.uuid4().hex + ".mp4"
     video_path = os.path.join(os.path.dirname(__file__), "tmp")
             im = PIL.Image.fromarray(painted_video[ti])
             # im.save(temp_out_f, "PNG", subsampling=0, quality=80)
             im.save(temp_out_f)
+            # print('saved', temp_out_f)
         # os.system('/usr/bin/ffmpeg -y -hide_banner -loglevel error -f image2 -framerate %d -pattern_type glob -i "%s/*.png" -c:v libx264 -crf 20 -pix_fmt yuv420p %s' % (video_fps, video_path, video_file_path))
         os.system('/usr/bin/ffmpeg -y -hide_banner -loglevel error -f image2 -framerate %d -pattern_type glob -i "%s/*.jpg" -c:v libx264 -crf 20 -pix_fmt yuv420p %s' % (video_fps, video_path, video_file_path))
         print('saved', video_file_path)
             # temp_out_f = '%s/%03d.png' % (video_path, ti)
             temp_out_f = '%s/%03d.jpg' % (video_path, ti)
             os.remove(temp_out_f)
+            # print('deleted', temp_out_f)
     return video_file_path
 @spaces.GPU
 def track(
+        video_preview,
+        video_input,
+        video_fps,
+        query_frame,
+        rate,
+        show_bkg,
+        cmap,
 ):
     # tracking_mode = 'selected'
     # if query_count == 0:
     tracks = traj_maps_e.permute(0,3,4,1,2).reshape(-1,T,2).numpy()
     visibs = visconf_maps_e.permute(0,3,4,1,2).reshape(-1,T,2)[:,:,0].numpy()
     confs = visconf_maps_e.permute(0,3,4,1,2).reshape(-1,T,2)[:,:,0].numpy()
+    # visibs = (visibs * confs) > 0.2 # N,T
+    visibs = (confs) > 0.1 # N,T
     # visibs = (confs) > 0.1 # N,T
     # print('sc', sc)
     # tracks = tracks * sc
+    return update_vis(rate, show_bkg, cmap, video_preview, query_frame, video_fps, tracks, visibs), tracks, visibs, gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True)
             # gr.update(interactive=True),
             # gr.update(interactive=True),
             # gr.update(interactive=True),
     gr.Markdown("# ⚡ AllTracker: Efficient Dense Point Tracking at High Resolution")
     gr.Markdown("<div style='text-align: left;'> \
+    <p>This demo runs <a href='https://alltracker.github.io/' target='_blank'>AllTracker</a> to perform all-pixel tracking in a video of your choice.</p> \
     <p>To get started, simply upload an mp4, or select one of the example videos. The shorter the video, the faster the processing. We recommend submitting videos under 20 seconds long.</p> \
     <p>After picking a video, click \"Submit\" to load the frames into the app, and optionally choose a query frame (using the slider), and then click \"Track\".</p> \
     <p>For full info on how this works, check out our <a href='https://github.com/aharley/alltracker/' target='_blank'>GitHub repo</a>, or <a href='https://arxiv.org/abs/2506.07310' target='_blank'>paper</a>.</p> \
         with gr.Column():
             with gr.Row():
                 query_frame_slider = gr.Slider(
+                    minimum=0, maximum=100, value=0, step=1, label="Query frame", interactive=False)
             # with gr.Row():
             #     undo = gr.Button("Undo", interactive=False)
             #     clear_frame = gr.Button("Clear Frame", interactive=False)
             with gr.Row():
                 # rate_slider = gr.Slider(
                 #     minimum=1, maximum=16, value=1, step=1, label="Choose subsampling rate", interactive=False)
+                rate_radio = gr.Radio([1, 2, 4, 8, 16], value=1, label="Subsampling rate", interactive=False)
             with gr.Row():
+                cmap_radio = gr.Radio(["gist_rainbow", "rainbow", "jet", "turbo", "bremm"], value="gist_rainbow", label="Colormap", interactive=False)
+            with gr.Row():
+                bkg_check = gr.Checkbox(value=True, label="Overlay tracks on video", interactive=False)
             with gr.Row():
                 output_video = gr.Video(
                     label="Output video",
             video_input,
             video_fps,
             query_frame_slider,
+            rate_radio,
+            bkg_check,
+            cmap_radio,
         ],
         outputs = [
             output_video,
             tracks,
             visibs,
             rate_radio,
+            bkg_check,
             cmap_radio,
             # rate1_button,
             # rate2_button,
     # )
     rate_radio.change(
         fn = update_vis,
+        inputs = [rate_radio, bkg_check, cmap_radio, video_preview, query_frame_slider, video_fps, tracks, visibs],
         outputs = [
             output_video,
         ],
     )
     cmap_radio.change(
         fn = update_vis,
+        inputs = [rate_radio, bkg_check, cmap_radio, video_preview, query_frame_slider, video_fps, tracks, visibs],
+        outputs = [
+            output_video,
+        ],
+        queue = False
+    )
+    bkg_check.change(
+        fn = update_vis,
+        inputs = [rate_radio, bkg_check, cmap_radio, video_preview, query_frame_slider, video_fps, tracks, visibs],
         outputs = [
             output_video,
         ],

utils/improc.py CHANGED Viewed

@@ -81,6 +81,7 @@ class ColorMap2d:
 def get_2d_colors(xys, H, W):
     N,D = xys.shape
     assert(D==2)
     bremm = ColorMap2d()
     xys[:,0] /= float(W-1)

 def get_2d_colors(xys, H, W):
     N,D = xys.shape
+    xys = xys.copy()
     assert(D==2)
     bremm = ColorMap2d()
     xys[:,0] /= float(W-1)