xizaoqu
commited on
Commit
·
594fef7
1
Parent(s):
0d5deae
update precision
Browse files- algorithms/worldmem/df_video.py +0 -7
- algorithms/worldmem/models/dit.py +0 -4
- app.py +6 -1
algorithms/worldmem/df_video.py
CHANGED
@@ -829,8 +829,6 @@ class WorldMemMinecraft(DiffusionForcingBase):
|
|
829 |
|
830 |
|
831 |
for ai in range(len(new_actions)):
|
832 |
-
from time import time
|
833 |
-
start_time = time()
|
834 |
|
835 |
last_frame = xs_pred[-1].clone()
|
836 |
curr_actions = new_actions[ai]
|
@@ -886,7 +884,6 @@ class WorldMemMinecraft(DiffusionForcingBase):
|
|
886 |
image_width=first_frame.shape[-1], image_height=first_frame.shape[-2]
|
887 |
)
|
888 |
|
889 |
-
mid_time = time()
|
890 |
# Perform sampling for each step in the scheduling matrix
|
891 |
for m in range(scheduling_matrix.shape[0] - 1):
|
892 |
from_noise_levels, to_noise_levels = self._prepare_noise_levels(
|
@@ -905,10 +902,6 @@ class WorldMemMinecraft(DiffusionForcingBase):
|
|
905 |
frame_idx=frame_idx_list
|
906 |
).cpu()
|
907 |
|
908 |
-
end_time = time()
|
909 |
-
|
910 |
-
print("time:", end_time - start_time, "mid time:", mid_time - start_time)
|
911 |
-
|
912 |
|
913 |
if condition_similar_length:
|
914 |
xs_pred = xs_pred[:-condition_similar_length]
|
|
|
829 |
|
830 |
|
831 |
for ai in range(len(new_actions)):
|
|
|
|
|
832 |
|
833 |
last_frame = xs_pred[-1].clone()
|
834 |
curr_actions = new_actions[ai]
|
|
|
884 |
image_width=first_frame.shape[-1], image_height=first_frame.shape[-2]
|
885 |
)
|
886 |
|
|
|
887 |
# Perform sampling for each step in the scheduling matrix
|
888 |
for m in range(scheduling_matrix.shape[0] - 1):
|
889 |
from_noise_levels, to_noise_levels = self._prepare_noise_levels(
|
|
|
902 |
frame_idx=frame_idx_list
|
903 |
).cpu()
|
904 |
|
|
|
|
|
|
|
|
|
905 |
|
906 |
if condition_similar_length:
|
907 |
xs_pred = xs_pred[:-condition_similar_length]
|
algorithms/worldmem/models/dit.py
CHANGED
@@ -487,8 +487,6 @@ class DiT(nn.Module):
|
|
487 |
t: (B, T,) tensor of diffusion timesteps
|
488 |
"""
|
489 |
|
490 |
-
from time import time
|
491 |
-
start = time()
|
492 |
B, T, C, H, W = x.shape
|
493 |
|
494 |
# add spatial embeddings
|
@@ -552,8 +550,6 @@ class DiT(nn.Module):
|
|
552 |
# print("self.blocks[0].r_adaLN_modulation[1].weight:", self.blocks[0].r_adaLN_modulation[1].weight)
|
553 |
# print("self.blocks[0].t_adaLN_modulation[1].weight:", self.blocks[0].t_adaLN_modulation[1].weight)
|
554 |
|
555 |
-
end_time = time()
|
556 |
-
print("in model time:", end_time - start)
|
557 |
return x
|
558 |
|
559 |
|
|
|
487 |
t: (B, T,) tensor of diffusion timesteps
|
488 |
"""
|
489 |
|
|
|
|
|
490 |
B, T, C, H, W = x.shape
|
491 |
|
492 |
# add spatial embeddings
|
|
|
550 |
# print("self.blocks[0].r_adaLN_modulation[1].weight:", self.blocks[0].r_adaLN_modulation[1].weight)
|
551 |
# print("self.blocks[0].t_adaLN_modulation[1].weight:", self.blocks[0].t_adaLN_modulation[1].weight)
|
552 |
|
|
|
|
|
553 |
return x
|
554 |
|
555 |
|
app.py
CHANGED
@@ -201,7 +201,11 @@ self_memory_c2w = None
|
|
201 |
self_frame_idx = None
|
202 |
|
203 |
|
204 |
-
|
|
|
|
|
|
|
|
|
205 |
def run_interactive(first_frame, action, first_pose, device, self_frames, self_actions,
|
206 |
self_poses, self_memory_c2w, self_frame_idx):
|
207 |
new_frame, self_frames, self_actions, self_poses, self_memory_c2w, self_frame_idx = worldmem.interactive(first_frame,
|
@@ -271,6 +275,7 @@ def reset():
|
|
271 |
|
272 |
self_frames = None
|
273 |
self_poses = None
|
|
|
274 |
self_memory_c2w = None
|
275 |
self_frame_idx = None
|
276 |
memory_frames = load_image_as_tensor(DEFAULT_IMAGE).numpy()[None]
|
|
|
201 |
self_frame_idx = None
|
202 |
|
203 |
|
204 |
+
def get_duration_single_image_to_long_video(first_frame, action, first_pose, device, self_frames, self_actions,
|
205 |
+
self_poses, self_memory_c2w, self_frame_idx):
|
206 |
+
return 5 * len(action) is self_actions is not None else 5
|
207 |
+
|
208 |
+
@spaces.GPU(duration=get_duration_single_image_to_long_video)
|
209 |
def run_interactive(first_frame, action, first_pose, device, self_frames, self_actions,
|
210 |
self_poses, self_memory_c2w, self_frame_idx):
|
211 |
new_frame, self_frames, self_actions, self_poses, self_memory_c2w, self_frame_idx = worldmem.interactive(first_frame,
|
|
|
275 |
|
276 |
self_frames = None
|
277 |
self_poses = None
|
278 |
+
self_actions = None
|
279 |
self_memory_c2w = None
|
280 |
self_frame_idx = None
|
281 |
memory_frames = load_image_as_tensor(DEFAULT_IMAGE).numpy()[None]
|