Spaces:
Runtime error
Runtime error
Commit
Β·
a8c8616
1
Parent(s):
899c526
chore: Update dependencies and remove unused files
Browse files- .gitignore +4 -3
- README.md +1 -1
- mini_dpvo/api/inference.py +182 -40
- mini_dpvo/dpvo.py +1 -0
- mini_dpvo/stream.py +31 -32
- packages.txt +0 -1
- pixi.lock +0 -0
- pixi.toml +37 -11
- pixi_app.py +0 -14
- tools/app.py +202 -101
- tools/demo.py +9 -11
.gitignore
CHANGED
|
@@ -164,6 +164,7 @@ cython_debug/
|
|
| 164 |
# pixi environments
|
| 165 |
.pixi
|
| 166 |
*.egg-info
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
|
|
|
|
|
| 164 |
# pixi environments
|
| 165 |
.pixi
|
| 166 |
*.egg-info
|
| 167 |
+
checkpoints/
|
| 168 |
+
media/
|
| 169 |
+
data/
|
| 170 |
+
build/
|
README.md
CHANGED
|
@@ -4,7 +4,7 @@ emoji: π₯
|
|
| 4 |
colorFrom: purple
|
| 5 |
colorTo: pink
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version: 4.
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
---
|
|
|
|
| 4 |
colorFrom: purple
|
| 5 |
colorTo: pink
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 4.36.1
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
---
|
mini_dpvo/api/inference.py
CHANGED
|
@@ -16,6 +16,12 @@ from dataclasses import dataclass
|
|
| 16 |
|
| 17 |
from timeit import default_timer as timer
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
@dataclass
|
| 21 |
class DPVOPrediction:
|
|
@@ -27,14 +33,20 @@ class DPVOPrediction:
|
|
| 27 |
|
| 28 |
def log_trajectory(
|
| 29 |
parent_log_path: Path,
|
| 30 |
-
poses: Float32[torch.Tensor, "buffer_size 7"],
|
| 31 |
-
points: Float32[torch.Tensor, "buffer_size*num_patches 3"],
|
| 32 |
-
colors: UInt8[torch.Tensor, "buffer_size num_patches 3"],
|
| 33 |
intri_np: Float64[np.ndarray, "4"],
|
| 34 |
-
bgr_hw3: UInt8[np.ndarray, "h w 3"],
|
|
|
|
|
|
|
| 35 |
):
|
| 36 |
cam_log_path = f"{parent_log_path}/camera"
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
rr.log(
|
| 39 |
f"{cam_log_path}/pinhole",
|
| 40 |
rr.Pinhole(
|
|
@@ -54,18 +66,45 @@ def log_trajectory(
|
|
| 54 |
last_index = nonzero_poses.shape[0] - 1
|
| 55 |
# get last non-zero pose, and the index of the last non-zero pose
|
| 56 |
quat_pose = nonzero_poses[last_index].numpy(force=True)
|
| 57 |
-
trans_quat = quat_pose[:3]
|
| 58 |
rotation_quat = Rotation.from_quat(quat_pose[3:])
|
| 59 |
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
rr.log(
|
| 62 |
f"{cam_log_path}",
|
| 63 |
-
rr.Transform3D(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
)
|
| 65 |
|
| 66 |
# outlier removal
|
| 67 |
trajectory_center = np.median(nonzero_poses[:, :3].numpy(force=True), axis=0)
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
| 69 |
points_np = nonzero_points.view(-1, 3).numpy(force=True)
|
| 70 |
colors_np = colors.view(-1, 3)[points_mask].numpy(force=True)
|
| 71 |
inlier_mask = (
|
|
@@ -82,6 +121,7 @@ def log_trajectory(
|
|
| 82 |
colors=colors_filtered,
|
| 83 |
),
|
| 84 |
)
|
|
|
|
| 85 |
|
| 86 |
|
| 87 |
def log_final(
|
|
@@ -102,7 +142,7 @@ def log_final(
|
|
| 102 |
|
| 103 |
|
| 104 |
def create_reader(
|
| 105 |
-
imagedir: str, calib: str, stride: int, skip: int, queue: Queue
|
| 106 |
) -> Process:
|
| 107 |
if os.path.isdir(imagedir):
|
| 108 |
reader = Process(
|
|
@@ -116,56 +156,158 @@ def create_reader(
|
|
| 116 |
return reader
|
| 117 |
|
| 118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
@torch.no_grad()
|
| 120 |
-
def
|
| 121 |
cfg: CfgNode,
|
| 122 |
network_path: str,
|
| 123 |
imagedir: str,
|
| 124 |
calib: str,
|
| 125 |
stride: int = 1,
|
| 126 |
skip: int = 0,
|
| 127 |
-
vis_during: bool = True,
|
| 128 |
timeit: bool = False,
|
| 129 |
) -> tuple[DPVOPrediction, float]:
|
| 130 |
slam = None
|
| 131 |
queue = Queue(maxsize=8)
|
|
|
|
| 132 |
reader: Process = create_reader(imagedir, calib, stride, skip, queue)
|
| 133 |
reader.start()
|
| 134 |
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
rr.log(f"{parent_log_path}", rr.ViewCoordinates.RDF, timeless=True)
|
| 138 |
|
| 139 |
start = timer()
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
if vis_during:
|
| 151 |
-
rr.set_time_sequence(timeline="timestep", sequence=t)
|
| 152 |
-
|
| 153 |
-
bgr_3hw: UInt8[torch.Tensor, "h w 3"] = (
|
| 154 |
-
torch.from_numpy(bgr_hw3).permute(2, 0, 1).cuda()
|
| 155 |
)
|
| 156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
|
| 158 |
-
|
| 159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
-
|
| 162 |
-
slam(t, bgr_3hw, intri_torch)
|
| 163 |
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
|
| 170 |
for _ in range(12):
|
| 171 |
slam.update()
|
|
|
|
| 16 |
|
| 17 |
from timeit import default_timer as timer
|
| 18 |
|
| 19 |
+
import cv2
|
| 20 |
+
import mmcv
|
| 21 |
+
from tqdm import tqdm
|
| 22 |
+
from mini_dust3r.api import OptimizedResult, inferece_dust3r
|
| 23 |
+
from mini_dust3r.model import AsymmetricCroCo3DStereo
|
| 24 |
+
|
| 25 |
|
| 26 |
@dataclass
|
| 27 |
class DPVOPrediction:
|
|
|
|
| 33 |
|
| 34 |
def log_trajectory(
|
| 35 |
parent_log_path: Path,
|
| 36 |
+
poses: Float32[torch.Tensor, "buffer_size 7"], # noqa: F722
|
| 37 |
+
points: Float32[torch.Tensor, "buffer_size*num_patches 3"], # noqa: F722
|
| 38 |
+
colors: UInt8[torch.Tensor, "buffer_size num_patches 3"], # noqa: F722
|
| 39 |
intri_np: Float64[np.ndarray, "4"],
|
| 40 |
+
bgr_hw3: UInt8[np.ndarray, "h w 3"], # noqa: F722
|
| 41 |
+
path_list: list,
|
| 42 |
+
jpg_quality: int = 90,
|
| 43 |
):
|
| 44 |
cam_log_path = f"{parent_log_path}/camera"
|
| 45 |
+
rgb_hw3 = mmcv.bgr2rgb(bgr_hw3)
|
| 46 |
+
rr.log(
|
| 47 |
+
f"{cam_log_path}/pinhole/image",
|
| 48 |
+
rr.Image(rgb_hw3).compress(jpeg_quality=jpg_quality),
|
| 49 |
+
)
|
| 50 |
rr.log(
|
| 51 |
f"{cam_log_path}/pinhole",
|
| 52 |
rr.Pinhole(
|
|
|
|
| 66 |
last_index = nonzero_poses.shape[0] - 1
|
| 67 |
# get last non-zero pose, and the index of the last non-zero pose
|
| 68 |
quat_pose = nonzero_poses[last_index].numpy(force=True)
|
| 69 |
+
trans_quat: Float32[np.ndarray, "3"] = quat_pose[:3]
|
| 70 |
rotation_quat = Rotation.from_quat(quat_pose[3:])
|
| 71 |
|
| 72 |
+
cam_R_world: Float64[np.ndarray, "3 3"] = rotation_quat.as_matrix()
|
| 73 |
+
|
| 74 |
+
cam_T_world = np.eye(4)
|
| 75 |
+
cam_T_world[:3, :3] = cam_R_world
|
| 76 |
+
cam_T_world[0:3, 3] = trans_quat
|
| 77 |
+
|
| 78 |
+
world_T_cam = np.linalg.inv(cam_T_world)
|
| 79 |
+
|
| 80 |
+
path_list.append(world_T_cam[:3, 3].copy().tolist())
|
| 81 |
+
|
| 82 |
rr.log(
|
| 83 |
f"{cam_log_path}",
|
| 84 |
+
rr.Transform3D(
|
| 85 |
+
translation=world_T_cam[:3, 3],
|
| 86 |
+
mat3x3=world_T_cam[:3, :3],
|
| 87 |
+
from_parent=False,
|
| 88 |
+
),
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
# log path using linestrip
|
| 92 |
+
rr.log(
|
| 93 |
+
f"{parent_log_path}/path",
|
| 94 |
+
rr.LineStrips3D(
|
| 95 |
+
strips=[
|
| 96 |
+
path_list,
|
| 97 |
+
],
|
| 98 |
+
colors=[255, 0, 0],
|
| 99 |
+
),
|
| 100 |
)
|
| 101 |
|
| 102 |
# outlier removal
|
| 103 |
trajectory_center = np.median(nonzero_poses[:, :3].numpy(force=True), axis=0)
|
| 104 |
+
|
| 105 |
+
def radii(a):
|
| 106 |
+
return np.linalg.norm(a - trajectory_center, axis=1)
|
| 107 |
+
|
| 108 |
points_np = nonzero_points.view(-1, 3).numpy(force=True)
|
| 109 |
colors_np = colors.view(-1, 3)[points_mask].numpy(force=True)
|
| 110 |
inlier_mask = (
|
|
|
|
| 121 |
colors=colors_filtered,
|
| 122 |
),
|
| 123 |
)
|
| 124 |
+
return path_list
|
| 125 |
|
| 126 |
|
| 127 |
def log_final(
|
|
|
|
| 142 |
|
| 143 |
|
| 144 |
def create_reader(
|
| 145 |
+
imagedir: str, calib: str | None, stride: int, skip: int, queue: Queue
|
| 146 |
) -> Process:
|
| 147 |
if os.path.isdir(imagedir):
|
| 148 |
reader = Process(
|
|
|
|
| 156 |
return reader
|
| 157 |
|
| 158 |
|
| 159 |
+
def calculate_num_frames(video_or_image_dir: str, stride: int, skip: int) -> int:
|
| 160 |
+
# Determine the total number of frames
|
| 161 |
+
total_frames = 0
|
| 162 |
+
if os.path.isdir(video_or_image_dir):
|
| 163 |
+
total_frames = len(
|
| 164 |
+
[
|
| 165 |
+
name
|
| 166 |
+
for name in os.listdir(video_or_image_dir)
|
| 167 |
+
if os.path.isfile(os.path.join(video_or_image_dir, name))
|
| 168 |
+
]
|
| 169 |
+
)
|
| 170 |
+
else:
|
| 171 |
+
cap = cv2.VideoCapture(video_or_image_dir)
|
| 172 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 173 |
+
cap.release()
|
| 174 |
+
|
| 175 |
+
total_frames = (total_frames - skip) // stride
|
| 176 |
+
return total_frames
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
def calib_from_dust3r(
|
| 180 |
+
bgr_hw3: UInt8[np.ndarray, "height width 3"],
|
| 181 |
+
model: AsymmetricCroCo3DStereo,
|
| 182 |
+
device: str,
|
| 183 |
+
) -> Float64[np.ndarray, "3 3"]:
|
| 184 |
+
"""
|
| 185 |
+
Calculates the calibration matrix from mini-dust3r.
|
| 186 |
+
|
| 187 |
+
Args:
|
| 188 |
+
bgr_hw3: The input image in BGR format with shape (height, width, 3).
|
| 189 |
+
model: The Dust3D-R model used for inference.
|
| 190 |
+
device: The device to run the inference on.
|
| 191 |
+
|
| 192 |
+
Returns:
|
| 193 |
+
The calibration matrix with shape (3, 3).
|
| 194 |
+
|
| 195 |
+
Raises:
|
| 196 |
+
None.
|
| 197 |
+
"""
|
| 198 |
+
tmp_path = Path("/tmp/dpvo/tmp.png")
|
| 199 |
+
# save image
|
| 200 |
+
mmcv.imwrite(bgr_hw3, str(tmp_path))
|
| 201 |
+
optimized_results: OptimizedResult = inferece_dust3r(
|
| 202 |
+
image_dir_or_list=tmp_path.parent,
|
| 203 |
+
model=model,
|
| 204 |
+
device=device,
|
| 205 |
+
batch_size=1,
|
| 206 |
+
)
|
| 207 |
+
# DELETE tmp file
|
| 208 |
+
tmp_path.unlink()
|
| 209 |
+
|
| 210 |
+
# get predicted intrinsics in original image size
|
| 211 |
+
downscaled_h, downscaled_w, _ = optimized_results.rgb_hw3_list[0].shape
|
| 212 |
+
orig_h, orig_w, _ = bgr_hw3.shape
|
| 213 |
+
|
| 214 |
+
# Scaling factors
|
| 215 |
+
scaling_factor_x = orig_w / downscaled_w
|
| 216 |
+
scaling_factor_y = orig_h / downscaled_h
|
| 217 |
+
|
| 218 |
+
# Scale the intrinsic matrix to the original image size
|
| 219 |
+
K_33_original = optimized_results.K_b33[0].copy()
|
| 220 |
+
K_33_original[0, 0] *= scaling_factor_x # fx
|
| 221 |
+
K_33_original[1, 1] *= scaling_factor_y # fy
|
| 222 |
+
K_33_original[0, 2] *= scaling_factor_x # cx
|
| 223 |
+
K_33_original[1, 2] *= scaling_factor_y # cy
|
| 224 |
+
|
| 225 |
+
return K_33_original
|
| 226 |
+
|
| 227 |
+
|
| 228 |
@torch.no_grad()
|
| 229 |
+
def inference_dpvo(
|
| 230 |
cfg: CfgNode,
|
| 231 |
network_path: str,
|
| 232 |
imagedir: str,
|
| 233 |
calib: str,
|
| 234 |
stride: int = 1,
|
| 235 |
skip: int = 0,
|
|
|
|
| 236 |
timeit: bool = False,
|
| 237 |
) -> tuple[DPVOPrediction, float]:
|
| 238 |
slam = None
|
| 239 |
queue = Queue(maxsize=8)
|
| 240 |
+
|
| 241 |
reader: Process = create_reader(imagedir, calib, stride, skip, queue)
|
| 242 |
reader.start()
|
| 243 |
|
| 244 |
+
parent_log_path = Path("world")
|
| 245 |
+
rr.log(f"{parent_log_path}", rr.ViewCoordinates.RDF, timeless=True)
|
|
|
|
| 246 |
|
| 247 |
start = timer()
|
| 248 |
+
total_frames = calculate_num_frames(imagedir, stride, skip)
|
| 249 |
+
|
| 250 |
+
# estimate camera intrinsics if not provided
|
| 251 |
+
if calib is None:
|
| 252 |
+
dust3r_device = (
|
| 253 |
+
"mps"
|
| 254 |
+
if torch.backends.mps.is_available()
|
| 255 |
+
else "cuda"
|
| 256 |
+
if torch.cuda.is_available()
|
| 257 |
+
else "cpu"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
)
|
| 259 |
+
dust3r_model = AsymmetricCroCo3DStereo.from_pretrained(
|
| 260 |
+
"naver/DUSt3R_ViTLarge_BaseDecoder_512_dpt"
|
| 261 |
+
).to(dust3r_device)
|
| 262 |
+
_, bgr_hw3, _ = queue.get()
|
| 263 |
+
K_33_pred = calib_from_dust3r(bgr_hw3, dust3r_model, dust3r_device)
|
| 264 |
+
intri_np_dust3r = np.array(
|
| 265 |
+
[K_33_pred[0, 0], K_33_pred[1, 1], K_33_pred[0, 2], K_33_pred[1, 2]]
|
| 266 |
+
)
|
| 267 |
+
|
| 268 |
+
# path list for visualizing the trajectory
|
| 269 |
+
path_list = []
|
| 270 |
|
| 271 |
+
with tqdm(total=total_frames, desc="Processing Frames") as pbar:
|
| 272 |
+
while True:
|
| 273 |
+
t: int
|
| 274 |
+
bgr_hw3: UInt8[np.ndarray, "h w 3"]
|
| 275 |
+
intri_np: Float64[np.ndarray, "4"]
|
| 276 |
+
(t, bgr_hw3, intri_np_calib) = queue.get()
|
| 277 |
+
intri_np = intri_np_calib if calib is not None else intri_np_dust3r
|
| 278 |
+
# queue will have a (-1, image, intrinsics) tuple when the reader is done
|
| 279 |
+
if t < 0:
|
| 280 |
+
break
|
| 281 |
|
| 282 |
+
rr.set_time_sequence(timeline="timestep", sequence=t)
|
|
|
|
| 283 |
|
| 284 |
+
bgr_3hw: UInt8[torch.Tensor, "h w 3"] = (
|
| 285 |
+
torch.from_numpy(bgr_hw3).permute(2, 0, 1).cuda()
|
| 286 |
+
)
|
| 287 |
+
intri_torch: Float64[torch.Tensor, "4"] = torch.from_numpy(intri_np).cuda()
|
| 288 |
+
|
| 289 |
+
if slam is None:
|
| 290 |
+
slam = DPVO(cfg, network_path, ht=bgr_3hw.shape[1], wd=bgr_3hw.shape[2])
|
| 291 |
+
|
| 292 |
+
with Timer("SLAM", enabled=timeit):
|
| 293 |
+
slam(t, bgr_3hw, intri_torch)
|
| 294 |
+
|
| 295 |
+
if slam.is_initialized:
|
| 296 |
+
poses: Float32[torch.Tensor, "buffer_size 7"] = slam.poses_
|
| 297 |
+
points: Float32[torch.Tensor, "buffer_size*num_patches 3"] = (
|
| 298 |
+
slam.points_
|
| 299 |
+
)
|
| 300 |
+
colors: UInt8[torch.Tensor, "buffer_size num_patches 3"] = slam.colors_
|
| 301 |
+
path_list = log_trajectory(
|
| 302 |
+
parent_log_path=parent_log_path,
|
| 303 |
+
poses=poses,
|
| 304 |
+
points=points,
|
| 305 |
+
colors=colors,
|
| 306 |
+
intri_np=intri_np,
|
| 307 |
+
bgr_hw3=bgr_hw3,
|
| 308 |
+
path_list=path_list,
|
| 309 |
+
)
|
| 310 |
+
pbar.update(1)
|
| 311 |
|
| 312 |
for _ in range(12):
|
| 313 |
slam.update()
|
mini_dpvo/dpvo.py
CHANGED
|
@@ -156,6 +156,7 @@ class DPVO:
|
|
| 156 |
poses = lietorch.stack(poses, dim=0)
|
| 157 |
poses = poses.inv().data.cpu().numpy()
|
| 158 |
tstamps = np.array(self.tlist, dtype=np.float64)
|
|
|
|
| 159 |
|
| 160 |
return poses, tstamps
|
| 161 |
|
|
|
|
| 156 |
poses = lietorch.stack(poses, dim=0)
|
| 157 |
poses = poses.inv().data.cpu().numpy()
|
| 158 |
tstamps = np.array(self.tlist, dtype=np.float64)
|
| 159 |
+
print("Done!")
|
| 160 |
|
| 161 |
return poses, tstamps
|
| 162 |
|
mini_dpvo/stream.py
CHANGED
|
@@ -3,13 +3,10 @@ import numpy as np
|
|
| 3 |
from pathlib import Path
|
| 4 |
from itertools import chain
|
| 5 |
from multiprocessing import Queue
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
-
def
|
| 9 |
-
queue: Queue, imagedir: str, calib: str, stride: int, skip: int = 0
|
| 10 |
-
) -> None:
|
| 11 |
-
"""image generator"""
|
| 12 |
-
|
| 13 |
calib = np.loadtxt(calib, delimiter=" ")
|
| 14 |
fx, fy, cx, cy = calib[:4]
|
| 15 |
|
|
@@ -18,6 +15,17 @@ def image_stream(
|
|
| 18 |
K[0, 2] = cx
|
| 19 |
K[1, 1] = fy
|
| 20 |
K[1, 2] = cy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
img_exts = ["*.png", "*.jpeg", "*.jpg"]
|
| 23 |
image_list = sorted(chain.from_iterable(Path(imagedir).glob(e) for e in img_exts))[
|
|
@@ -26,15 +34,11 @@ def image_stream(
|
|
| 26 |
|
| 27 |
for t, imfile in enumerate(image_list):
|
| 28 |
image = cv2.imread(str(imfile))
|
| 29 |
-
if len(calib) > 4:
|
| 30 |
-
image = cv2.undistort(image, K, calib[4:])
|
| 31 |
-
|
| 32 |
-
if 0:
|
| 33 |
-
image = cv2.resize(image, None, fx=0.5, fy=0.5)
|
| 34 |
-
intrinsics = np.array([fx / 2, fy / 2, cx / 2, cy / 2])
|
| 35 |
|
| 36 |
-
|
| 37 |
intrinsics = np.array([fx, fy, cx, cy])
|
|
|
|
|
|
|
| 38 |
|
| 39 |
h, w, _ = image.shape
|
| 40 |
image = image[: h - h % 16, : w - w % 16]
|
|
@@ -45,48 +49,43 @@ def image_stream(
|
|
| 45 |
|
| 46 |
|
| 47 |
def video_stream(
|
| 48 |
-
queue: Queue, imagedir: str, calib: str, stride: int, skip: int = 0
|
| 49 |
) -> None:
|
| 50 |
"""video generator"""
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
-
|
| 53 |
-
fx, fy, cx, cy = calib[:4]
|
| 54 |
-
|
| 55 |
-
K = np.eye(3)
|
| 56 |
-
K[0, 0] = fx
|
| 57 |
-
K[0, 2] = cx
|
| 58 |
-
K[1, 1] = fy
|
| 59 |
-
K[1, 2] = cy
|
| 60 |
-
|
| 61 |
-
cap = cv2.VideoCapture(imagedir)
|
| 62 |
|
| 63 |
t = 0
|
| 64 |
|
| 65 |
for _ in range(skip):
|
| 66 |
-
|
| 67 |
|
| 68 |
while True:
|
| 69 |
# Capture frame-by-frame
|
| 70 |
for _ in range(stride):
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
if not ret:
|
| 74 |
break
|
| 75 |
|
| 76 |
-
if
|
| 77 |
break
|
| 78 |
|
| 79 |
-
if len(calib) > 4:
|
| 80 |
-
|
| 81 |
|
| 82 |
image = cv2.resize(image, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_AREA)
|
| 83 |
h, w, _ = image.shape
|
| 84 |
image = image[: h - h % 16, : w - w % 16]
|
| 85 |
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
| 87 |
queue.put((t, image, intrinsics))
|
| 88 |
|
| 89 |
t += 1
|
| 90 |
|
| 91 |
queue.put((-1, image, intrinsics))
|
| 92 |
-
cap.release()
|
|
|
|
| 3 |
from pathlib import Path
|
| 4 |
from itertools import chain
|
| 5 |
from multiprocessing import Queue
|
| 6 |
+
import mmcv
|
| 7 |
|
| 8 |
|
| 9 |
+
def load_calib(calib: str) -> np.ndarray:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
calib = np.loadtxt(calib, delimiter=" ")
|
| 11 |
fx, fy, cx, cy = calib[:4]
|
| 12 |
|
|
|
|
| 15 |
K[0, 2] = cx
|
| 16 |
K[1, 1] = fy
|
| 17 |
K[1, 2] = cy
|
| 18 |
+
return K, calib
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def image_stream(
|
| 22 |
+
queue: Queue, imagedir: str, calib: str | None, stride: int, skip: int = 0
|
| 23 |
+
) -> None:
|
| 24 |
+
"""image generator"""
|
| 25 |
+
|
| 26 |
+
if calib is not None:
|
| 27 |
+
K, calib = load_calib(calib)
|
| 28 |
+
fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]
|
| 29 |
|
| 30 |
img_exts = ["*.png", "*.jpeg", "*.jpg"]
|
| 31 |
image_list = sorted(chain.from_iterable(Path(imagedir).glob(e) for e in img_exts))[
|
|
|
|
| 34 |
|
| 35 |
for t, imfile in enumerate(image_list):
|
| 36 |
image = cv2.imread(str(imfile))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
+
if calib is not None:
|
| 39 |
intrinsics = np.array([fx, fy, cx, cy])
|
| 40 |
+
else:
|
| 41 |
+
intrinsics = None
|
| 42 |
|
| 43 |
h, w, _ = image.shape
|
| 44 |
image = image[: h - h % 16, : w - w % 16]
|
|
|
|
| 49 |
|
| 50 |
|
| 51 |
def video_stream(
|
| 52 |
+
queue: Queue, imagedir: str, calib: str | None, stride: int, skip: int = 0
|
| 53 |
) -> None:
|
| 54 |
"""video generator"""
|
| 55 |
+
if calib is not None:
|
| 56 |
+
K, calib = load_calib(calib)
|
| 57 |
+
fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]
|
| 58 |
|
| 59 |
+
video_reader = mmcv.VideoReader(imagedir)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
t = 0
|
| 62 |
|
| 63 |
for _ in range(skip):
|
| 64 |
+
image = video_reader.read()
|
| 65 |
|
| 66 |
while True:
|
| 67 |
# Capture frame-by-frame
|
| 68 |
for _ in range(stride):
|
| 69 |
+
image = video_reader.read()
|
| 70 |
+
if image is None:
|
|
|
|
| 71 |
break
|
| 72 |
|
| 73 |
+
if image is None:
|
| 74 |
break
|
| 75 |
|
| 76 |
+
# if len(calib) > 4:
|
| 77 |
+
# image = cv2.undistort(image, K, calib[4:])
|
| 78 |
|
| 79 |
image = cv2.resize(image, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_AREA)
|
| 80 |
h, w, _ = image.shape
|
| 81 |
image = image[: h - h % 16, : w - w % 16]
|
| 82 |
|
| 83 |
+
if calib is not None:
|
| 84 |
+
intrinsics = np.array([fx * 0.5, fy * 0.5, cx * 0.5, cy * 0.5])
|
| 85 |
+
else:
|
| 86 |
+
intrinsics = None
|
| 87 |
queue.put((t, image, intrinsics))
|
| 88 |
|
| 89 |
t += 1
|
| 90 |
|
| 91 |
queue.put((-1, image, intrinsics))
|
|
|
packages.txt
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
lsof
|
|
|
|
|
|
pixi.lock
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
pixi.toml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
[project]
|
| 2 |
name = "mini-dpvo"
|
| 3 |
-
version = "0.1.
|
| 4 |
description = "Add a short description here"
|
| 5 |
authors = ["pablovela5620 <[email protected]>"]
|
| 6 |
channels = ["nvidia/label/cuda-11.8.0", "nvidia", "conda-forge", "pytorch", "pyg"]
|
|
@@ -15,16 +15,40 @@ download-model = """
|
|
| 15 |
|| (
|
| 16 |
wget https://www.dropbox.com/s/nap0u8zslspdwm4/models.zip
|
| 17 |
&& unzip models.zip -d checkpoints
|
|
|
|
| 18 |
)
|
| 19 |
"""
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
[dependencies]
|
| 26 |
python = "3.11.*"
|
| 27 |
-
pip = ">=
|
| 28 |
cuda = {version = "*", channel="nvidia/label/cuda-11.8.0"}
|
| 29 |
pytorch-cuda = {version = "11.8.*", channel="pytorch"}
|
| 30 |
pytorch = {version = ">=2.2.0,<2.3", channel="pytorch"}
|
|
@@ -35,14 +59,16 @@ matplotlib = ">=3.8.4,<3.9"
|
|
| 35 |
yacs = ">=0.1.8,<0.2"
|
| 36 |
jaxtyping = ">=0.2.28,<0.3"
|
| 37 |
icecream = ">=2.1.3,<2.2"
|
| 38 |
-
rerun-sdk = "0.15.*"
|
| 39 |
-
gradio = "4.31.2.*"
|
| 40 |
eigen = ">=3.4.0,<3.5"
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
[pypi-dependencies]
|
| 43 |
-
mini-dust3r = "*"
|
| 44 |
-
spaces = "==0.28.3"
|
| 45 |
opencv-python = ">=4.9.0.80"
|
| 46 |
evo = ">=1.28.0"
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
[project]
|
| 2 |
name = "mini-dpvo"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
description = "Add a short description here"
|
| 5 |
authors = ["pablovela5620 <[email protected]>"]
|
| 6 |
channels = ["nvidia/label/cuda-11.8.0", "nvidia", "conda-forge", "pytorch", "pyg"]
|
|
|
|
| 15 |
|| (
|
| 16 |
wget https://www.dropbox.com/s/nap0u8zslspdwm4/models.zip
|
| 17 |
&& unzip models.zip -d checkpoints
|
| 18 |
+
&& rm -r models.zip
|
| 19 |
)
|
| 20 |
"""
|
| 21 |
+
download-dpvo-data = """
|
| 22 |
+
test -e data/movies/IMG_0492.MOV
|
| 23 |
+
|| (
|
| 24 |
+
wget https://www.dropbox.com/s/7030y0mdl6efteg/movies.zip -P data/
|
| 25 |
+
&& unzip data/movies.zip -d data/
|
| 26 |
+
&& rm -r data/movies.zip
|
| 27 |
+
)
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
+
download-iphone-data = """
|
| 31 |
+
test -e data/iphone/pool.MOV
|
| 32 |
+
|| (
|
| 33 |
+
huggingface-cli download pablovela5620/dpvo-example-data pool.MOV --repo-type dataset --local-dir data/iphone/
|
| 34 |
+
)
|
| 35 |
+
"""
|
| 36 |
|
| 37 |
+
post-install = {cmd="python -m pip install -e .", depends_on=["download-model", "download-dpvo-data", "download-iphone-data"], outputs=["cuda_ba.cpython-311-x86_64-linux-gnu.so"]}
|
| 38 |
+
rr-viewer = "rerun --memory-limit 50% --drop-at-latency 500ms"
|
| 39 |
+
|
| 40 |
+
demo = """
|
| 41 |
+
python tools/demo.py --imagedir data/movies/IMG_0493.MOV --config config/fast.yaml
|
| 42 |
+
"""
|
| 43 |
+
app = {cmd="python tools/app.py", depends_on=["post-install"]}
|
| 44 |
+
|
| 45 |
+
# Docker tasks
|
| 46 |
+
docker-build = "docker build --no-cache -t mini-dpvo ."
|
| 47 |
+
docker-run = {cmd="docker run --gpus all -it -p 7860:7860 mini-dpvo", depends_on=["docker-build"]}
|
| 48 |
|
| 49 |
[dependencies]
|
| 50 |
python = "3.11.*"
|
| 51 |
+
pip = ">=24.0,<25"
|
| 52 |
cuda = {version = "*", channel="nvidia/label/cuda-11.8.0"}
|
| 53 |
pytorch-cuda = {version = "11.8.*", channel="pytorch"}
|
| 54 |
pytorch = {version = ">=2.2.0,<2.3", channel="pytorch"}
|
|
|
|
| 59 |
yacs = ">=0.1.8,<0.2"
|
| 60 |
jaxtyping = ">=0.2.28,<0.3"
|
| 61 |
icecream = ">=2.1.3,<2.2"
|
|
|
|
|
|
|
| 62 |
eigen = ">=3.4.0,<3.5"
|
| 63 |
+
rerun-sdk = ">=0.16.1"
|
| 64 |
+
tyro = ">=0.8.4,<0.9"
|
| 65 |
+
unzip = ">=6.0,<7"
|
| 66 |
|
| 67 |
[pypi-dependencies]
|
|
|
|
|
|
|
| 68 |
opencv-python = ">=4.9.0.80"
|
| 69 |
evo = ">=1.28.0"
|
| 70 |
+
mini-dust3r = "*"
|
| 71 |
+
gradio-rerun = ">=0.0.3"
|
| 72 |
+
mmcv = "*"
|
| 73 |
+
yt-dlp = "*"
|
| 74 |
+
gradio = ">=4.36.0"
|
pixi_app.py
DELETED
|
@@ -1,14 +0,0 @@
|
|
| 1 |
-
import gradio as gr
|
| 2 |
-
import spaces
|
| 3 |
-
import torch
|
| 4 |
-
|
| 5 |
-
tensor = torch.zeros(3).cuda()
|
| 6 |
-
print(tensor.device)
|
| 7 |
-
|
| 8 |
-
@spaces.GPU
|
| 9 |
-
def greet(name):
|
| 10 |
-
print(tensor.device)
|
| 11 |
-
return "Hello pixi pablo " + name + "!!"
|
| 12 |
-
|
| 13 |
-
demo = gr.Interface(fn=greet, inputs="text", outputs="text")
|
| 14 |
-
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools/app.py
CHANGED
|
@@ -1,89 +1,140 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
|
| 3 |
-
# import spaces
|
| 4 |
from gradio_rerun import Rerun
|
| 5 |
import rerun as rr
|
| 6 |
import rerun.blueprint as rrb
|
| 7 |
-
from pathlib import Path
|
| 8 |
-
import uuid
|
| 9 |
import mmcv
|
| 10 |
-
import
|
|
|
|
| 11 |
|
| 12 |
-
from mini_dpvo.api.inference import run
|
| 13 |
from mini_dpvo.config import cfg as base_cfg
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
-
|
| 16 |
-
base_cfg.BUFFER_SIZE = 2048
|
| 17 |
-
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
rrb.Horizontal(
|
| 24 |
-
rrb.Spatial3DView(origin=f"{log_path}"),
|
| 25 |
-
),
|
| 26 |
-
collapse_panels=True,
|
| 27 |
-
)
|
| 28 |
else:
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
),
|
| 47 |
-
collapse_panels=True,
|
| 48 |
-
)
|
| 49 |
-
return blueprint
|
| 50 |
|
|
|
|
|
|
|
| 51 |
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
|
|
|
|
|
|
| 59 |
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
dpvo_pred, time_taken = run(
|
| 71 |
-
cfg=base_cfg,
|
| 72 |
-
network_path="checkpoints/dpvo.pth",
|
| 73 |
-
imagedir=video_file_path,
|
| 74 |
-
calib="data/calib/iphone.txt",
|
| 75 |
-
stride=stride,
|
| 76 |
-
skip=0,
|
| 77 |
-
vis_during=True,
|
| 78 |
-
)
|
| 79 |
|
| 80 |
-
|
| 81 |
-
|
| 82 |
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
|
| 89 |
def on_file_upload(video_file_path: str) -> None:
|
|
@@ -96,26 +147,23 @@ def on_file_upload(video_file_path: str) -> None:
|
|
| 96 |
return video_info
|
| 97 |
|
| 98 |
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
'<p style="text-align: center;">Learn more about mini-dpvo here <a href="https://github.com/pablovela5620/mini-dpvo">here</a></p>'
|
| 110 |
-
)
|
| 111 |
-
with gr.Tab(label="Video Input"):
|
| 112 |
with gr.Column():
|
| 113 |
with gr.Row():
|
| 114 |
video_input = gr.File(
|
| 115 |
-
height=
|
| 116 |
file_count="single",
|
| 117 |
-
file_types=[".mp4", ".mov"],
|
| 118 |
-
label="Video",
|
| 119 |
)
|
| 120 |
with gr.Column():
|
| 121 |
video_info = gr.Markdown(
|
|
@@ -123,26 +171,79 @@ with gr.Blocks(
|
|
| 123 |
**Video Info:**
|
| 124 |
"""
|
| 125 |
)
|
| 126 |
-
time_taken = gr.
|
|
|
|
|
|
|
| 127 |
with gr.Accordion(label="Advanced", open=False):
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
)
|
| 142 |
|
| 143 |
video_input.upload(
|
| 144 |
fn=on_file_upload, inputs=[video_input], outputs=[video_info]
|
| 145 |
)
|
| 146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
|
| 148 |
-
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
|
|
|
|
| 3 |
from gradio_rerun import Rerun
|
| 4 |
import rerun as rr
|
| 5 |
import rerun.blueprint as rrb
|
|
|
|
|
|
|
| 6 |
import mmcv
|
| 7 |
+
from timeit import default_timer as timer
|
| 8 |
+
from typing import Literal
|
| 9 |
|
|
|
|
| 10 |
from mini_dpvo.config import cfg as base_cfg
|
| 11 |
+
from mini_dpvo.api.inference import (
|
| 12 |
+
log_trajectory,
|
| 13 |
+
calib_from_dust3r,
|
| 14 |
+
create_reader,
|
| 15 |
+
calculate_num_frames,
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
import torch
|
| 19 |
+
import numpy as np
|
| 20 |
+
from pathlib import Path
|
| 21 |
+
from multiprocessing import Process, Queue
|
| 22 |
+
from mini_dpvo.dpvo import DPVO
|
| 23 |
+
from jaxtyping import UInt8, Float64, Float32
|
| 24 |
+
from mini_dust3r.model import AsymmetricCroCo3DStereo
|
| 25 |
+
from tqdm import tqdm
|
| 26 |
+
import tyro
|
| 27 |
+
from dataclasses import dataclass
|
| 28 |
+
|
| 29 |
+
if gr.NO_RELOAD:
|
| 30 |
+
NETWORK_PATH = "checkpoints/dpvo.pth"
|
| 31 |
+
DEVICE = (
|
| 32 |
+
"mps"
|
| 33 |
+
if torch.backends.mps.is_available()
|
| 34 |
+
else "cuda"
|
| 35 |
+
if torch.cuda.is_available()
|
| 36 |
+
else "cpu"
|
| 37 |
+
)
|
| 38 |
+
MODEL = AsymmetricCroCo3DStereo.from_pretrained(
|
| 39 |
+
"naver/DUSt3R_ViTLarge_BaseDecoder_512_dpt"
|
| 40 |
+
).to(DEVICE)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
@dataclass
|
| 44 |
+
class GradioDPVOConfig:
|
| 45 |
+
share: bool = False
|
| 46 |
+
port: int = 7860
|
| 47 |
+
server_name: str = "0.0.0.0"
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
@rr.thread_local_stream("mini_dpvo")
|
| 51 |
+
@torch.no_grad()
|
| 52 |
+
def run_dpvo(
|
| 53 |
+
video_file_path: str,
|
| 54 |
+
jpg_quality: str,
|
| 55 |
+
stride: int = 1,
|
| 56 |
+
skip: int = 0,
|
| 57 |
+
config_type: Literal["accurate", "fast"] = "accurate",
|
| 58 |
+
progress=gr.Progress(),
|
| 59 |
+
):
|
| 60 |
+
# create a stream to send data back to the rerun viewer
|
| 61 |
+
stream = rr.binary_stream()
|
| 62 |
+
parent_log_path = Path("world")
|
| 63 |
+
rr.log(f"{parent_log_path}", rr.ViewCoordinates.RDF, timeless=True)
|
| 64 |
+
|
| 65 |
+
blueprint = rrb.Blueprint(
|
| 66 |
+
collapse_panels=True,
|
| 67 |
+
)
|
| 68 |
|
| 69 |
+
rr.send_blueprint(blueprint)
|
|
|
|
|
|
|
| 70 |
|
| 71 |
+
if config_type == "accurate":
|
| 72 |
+
base_cfg.merge_from_file("config/default.yaml")
|
| 73 |
+
elif config_type == "fast":
|
| 74 |
+
base_cfg.merge_from_file("config/fast.yaml")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
else:
|
| 76 |
+
raise ValueError("Invalid config type")
|
| 77 |
+
base_cfg.BUFFER_SIZE = 2048
|
| 78 |
+
|
| 79 |
+
slam = None
|
| 80 |
+
start_time = timer()
|
| 81 |
+
queue = Queue(maxsize=8)
|
| 82 |
+
|
| 83 |
+
reader: Process = create_reader(video_file_path, None, stride, skip, queue)
|
| 84 |
+
reader.start()
|
| 85 |
+
|
| 86 |
+
# get the first frame
|
| 87 |
+
progress(progress=0.1, desc="Estimating Camera Intrinsics")
|
| 88 |
+
_, bgr_hw3, _ = queue.get()
|
| 89 |
+
K_33_pred = calib_from_dust3r(bgr_hw3, MODEL, DEVICE)
|
| 90 |
+
intri_np: Float64[np.ndarray, "4"] = np.array(
|
| 91 |
+
[K_33_pred[0, 0], K_33_pred[1, 1], K_33_pred[0, 2], K_33_pred[1, 2]]
|
| 92 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
|
| 94 |
+
num_frames = calculate_num_frames(video_file_path, stride, skip)
|
| 95 |
+
path_list = []
|
| 96 |
|
| 97 |
+
with tqdm(total=num_frames, desc="Processing Frames") as pbar:
|
| 98 |
+
while True:
|
| 99 |
+
timestep: int
|
| 100 |
+
bgr_hw3: UInt8[np.ndarray, "h w 3"]
|
| 101 |
+
intri_np: Float64[np.ndarray, "4"]
|
| 102 |
+
(timestep, bgr_hw3, _) = queue.get()
|
| 103 |
+
# queue will have a (-1, image, intrinsics) tuple when the reader is done
|
| 104 |
+
if timestep < 0:
|
| 105 |
+
break
|
| 106 |
|
| 107 |
+
rr.set_time_sequence(timeline="timestep", sequence=timestep)
|
| 108 |
+
|
| 109 |
+
bgr_3hw: UInt8[torch.Tensor, "h w 3"] = (
|
| 110 |
+
torch.from_numpy(bgr_hw3).permute(2, 0, 1).cuda()
|
| 111 |
+
)
|
| 112 |
+
intri_torch: Float64[torch.Tensor, "4"] = torch.from_numpy(intri_np).cuda()
|
| 113 |
+
|
| 114 |
+
if slam is None:
|
| 115 |
+
_, h, w = bgr_3hw.shape
|
| 116 |
+
slam = DPVO(base_cfg, NETWORK_PATH, ht=h, wd=w)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
+
slam(timestep, bgr_3hw, intri_torch)
|
| 119 |
+
pbar.update(1)
|
| 120 |
|
| 121 |
+
if slam.is_initialized:
|
| 122 |
+
poses: Float32[torch.Tensor, "buffer_size 7"] = slam.poses_
|
| 123 |
+
points: Float32[torch.Tensor, "buffer_size*num_patches 3"] = (
|
| 124 |
+
slam.points_
|
| 125 |
+
)
|
| 126 |
+
colors: UInt8[torch.Tensor, "buffer_size num_patches 3"] = slam.colors_
|
| 127 |
+
path_list = log_trajectory(
|
| 128 |
+
parent_log_path,
|
| 129 |
+
poses,
|
| 130 |
+
points,
|
| 131 |
+
colors,
|
| 132 |
+
intri_np,
|
| 133 |
+
bgr_hw3,
|
| 134 |
+
path_list,
|
| 135 |
+
jpg_quality,
|
| 136 |
+
)
|
| 137 |
+
yield stream.read(), timer() - start_time
|
| 138 |
|
| 139 |
|
| 140 |
def on_file_upload(video_file_path: str) -> None:
|
|
|
|
| 147 |
return video_info
|
| 148 |
|
| 149 |
|
| 150 |
+
def main(gradio_config: GradioDPVOConfig):
|
| 151 |
+
with gr.Blocks(
|
| 152 |
+
css=""".gradio-container {margin: 0 !important; min-width: 100%};""",
|
| 153 |
+
title="Mini-DPVO Demo",
|
| 154 |
+
) as demo:
|
| 155 |
+
# scene state is save so that you can change conf_thr, cam_size... without rerunning the inference
|
| 156 |
+
gr.HTML('<h2 style="text-align: center;">Mini-DPVO Demo</h2>')
|
| 157 |
+
gr.HTML(
|
| 158 |
+
'<p style="text-align: center;">Unofficial DPVO demo using the mini-dpvo. Learn more about mini-dpvo <a href="https://github.com/pablovela5620/mini-dpvo">here</a>.</p>'
|
| 159 |
+
)
|
|
|
|
|
|
|
|
|
|
| 160 |
with gr.Column():
|
| 161 |
with gr.Row():
|
| 162 |
video_input = gr.File(
|
| 163 |
+
height=100,
|
| 164 |
file_count="single",
|
| 165 |
+
file_types=[".mp4", ".mov", ".MOV", ".webm"],
|
| 166 |
+
label="Video File",
|
| 167 |
)
|
| 168 |
with gr.Column():
|
| 169 |
video_info = gr.Markdown(
|
|
|
|
| 171 |
**Video Info:**
|
| 172 |
"""
|
| 173 |
)
|
| 174 |
+
time_taken = gr.Number(
|
| 175 |
+
label="Time Taken (s)", precision=2, interactive=False
|
| 176 |
+
)
|
| 177 |
with gr.Accordion(label="Advanced", open=False):
|
| 178 |
+
with gr.Row():
|
| 179 |
+
jpg_quality = gr.Radio(
|
| 180 |
+
label="JPEG Quality %: Lower quality means faster streaming",
|
| 181 |
+
choices=[10, 50, 90],
|
| 182 |
+
value=90,
|
| 183 |
+
type="value",
|
| 184 |
+
)
|
| 185 |
+
stride = gr.Slider(
|
| 186 |
+
label="Stride: How many frames to sample between each prediction",
|
| 187 |
+
minimum=1,
|
| 188 |
+
maximum=5,
|
| 189 |
+
step=1,
|
| 190 |
+
value=5,
|
| 191 |
+
)
|
| 192 |
+
skip = gr.Number(
|
| 193 |
+
label="Skip: How many frames to skip at the beginning",
|
| 194 |
+
value=0,
|
| 195 |
+
precision=0,
|
| 196 |
+
)
|
| 197 |
+
config_type = gr.Dropdown(
|
| 198 |
+
label="Config Type: Choose between accurate and fast",
|
| 199 |
+
value="fast",
|
| 200 |
+
choices=["accurate", "fast"],
|
| 201 |
+
max_choices=1,
|
| 202 |
+
)
|
| 203 |
+
with gr.Row():
|
| 204 |
+
start_btn = gr.Button("Run")
|
| 205 |
+
stop_btn = gr.Button("Stop")
|
| 206 |
+
rr_viewer = Rerun(height=600, streaming=True)
|
| 207 |
+
|
| 208 |
+
# Example videos
|
| 209 |
+
base_example_params = [50, 4, 0, "fast"]
|
| 210 |
+
example_dpvo_dir = Path("data/movies")
|
| 211 |
+
example_iphone_dir = Path("data/iphone")
|
| 212 |
+
example_video_paths = sorted(example_iphone_dir.glob("*.MOV")) + sorted(
|
| 213 |
+
example_dpvo_dir.glob("*.MOV")
|
| 214 |
+
)
|
| 215 |
+
example_video_paths = [str(path) for path in example_video_paths]
|
| 216 |
+
|
| 217 |
+
gr.Examples(
|
| 218 |
+
examples=[[path, *base_example_params] for path in example_video_paths],
|
| 219 |
+
inputs=[video_input, jpg_quality, stride, skip, config_type],
|
| 220 |
+
outputs=[rr_viewer],
|
| 221 |
+
fn=run_dpvo,
|
| 222 |
+
)
|
| 223 |
+
|
| 224 |
+
click_event = start_btn.click(
|
| 225 |
+
fn=run_dpvo,
|
| 226 |
+
inputs=[video_input, jpg_quality, stride, skip, config_type],
|
| 227 |
+
outputs=[rr_viewer, time_taken],
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
stop_btn.click(
|
| 231 |
+
fn=None,
|
| 232 |
+
inputs=[],
|
| 233 |
+
outputs=[],
|
| 234 |
+
cancels=[click_event],
|
| 235 |
)
|
| 236 |
|
| 237 |
video_input.upload(
|
| 238 |
fn=on_file_upload, inputs=[video_input], outputs=[video_info]
|
| 239 |
)
|
| 240 |
|
| 241 |
+
demo.launch(
|
| 242 |
+
share=gradio_config.share,
|
| 243 |
+
server_name=gradio_config.server_name,
|
| 244 |
+
server_port=gradio_config.port,
|
| 245 |
+
)
|
| 246 |
+
|
| 247 |
|
| 248 |
+
if __name__ == "__main__":
|
| 249 |
+
main(tyro.cli(GradioDPVOConfig))
|
tools/demo.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
from argparse import ArgumentParser
|
| 2 |
import rerun as rr
|
| 3 |
-
from mini_dpvo.api.inference import
|
| 4 |
from mini_dpvo.config import cfg as base_cfg
|
| 5 |
|
| 6 |
|
|
@@ -12,8 +12,7 @@ if __name__ == "__main__":
|
|
| 12 |
parser.add_argument("--stride", type=int, default=2)
|
| 13 |
parser.add_argument("--skip", type=int, default=0)
|
| 14 |
parser.add_argument("--buffer", type=int, default=2048)
|
| 15 |
-
parser.add_argument("--config", default="config/
|
| 16 |
-
parser.add_argument("--vis-during", action="store_true")
|
| 17 |
rr.script_add_args(parser)
|
| 18 |
args = parser.parse_args()
|
| 19 |
rr.script_setup(args, "mini_dpvo")
|
|
@@ -24,13 +23,12 @@ if __name__ == "__main__":
|
|
| 24 |
print("Running with config...")
|
| 25 |
print(base_cfg)
|
| 26 |
|
| 27 |
-
|
| 28 |
-
base_cfg,
|
| 29 |
-
args.network_path,
|
| 30 |
-
args.imagedir,
|
| 31 |
-
args.calib,
|
| 32 |
-
args.stride,
|
| 33 |
-
args.skip,
|
| 34 |
-
vis_during=args.vis_during,
|
| 35 |
)
|
| 36 |
rr.script_teardown(args)
|
|
|
|
| 1 |
from argparse import ArgumentParser
|
| 2 |
import rerun as rr
|
| 3 |
+
from mini_dpvo.api.inference import inference_dpvo
|
| 4 |
from mini_dpvo.config import cfg as base_cfg
|
| 5 |
|
| 6 |
|
|
|
|
| 12 |
parser.add_argument("--stride", type=int, default=2)
|
| 13 |
parser.add_argument("--skip", type=int, default=0)
|
| 14 |
parser.add_argument("--buffer", type=int, default=2048)
|
| 15 |
+
parser.add_argument("--config", default="config/fast.yaml")
|
|
|
|
| 16 |
rr.script_add_args(parser)
|
| 17 |
args = parser.parse_args()
|
| 18 |
rr.script_setup(args, "mini_dpvo")
|
|
|
|
| 23 |
print("Running with config...")
|
| 24 |
print(base_cfg)
|
| 25 |
|
| 26 |
+
inference_dpvo(
|
| 27 |
+
cfg=base_cfg,
|
| 28 |
+
network_path=args.network_path,
|
| 29 |
+
imagedir=args.imagedir,
|
| 30 |
+
calib=args.calib,
|
| 31 |
+
stride=args.stride,
|
| 32 |
+
skip=args.skip,
|
|
|
|
| 33 |
)
|
| 34 |
rr.script_teardown(args)
|