neural-os

Runtime error

da03 commited on Nov 18, 2024

Commit

b07d75e

1 Parent(s): a9363f4

.

Files changed (2) hide show

main.py CHANGED Viewed

@@ -258,16 +258,25 @@ def predict_next_frame(previous_frames: List[np.ndarray], previous_actions: List
     prompt = " ".join(action_descriptions[-8:])
     print(prompt)
     #prompt = "N N N N N : N N N N N N N N N N : N N N N N N N N N N : N N N N N N N N N N : N N N N N N N N N N : N N N N N N N N N N : N N N N N N N N N N : N N N N N + 0 3 0 7 : + 0 3 7 5"
-    x, y, action_type = parse_action_string(action_descriptions[-1])
-    pos_map, leftclick_map, x_scaled, y_scaled = create_position_and_click_map((x, y), action_type)
     #prompt = ''
     #prompt = "1~1 0~0 0~0 0~0 0~0 0~0 0~0 0~0"
     print(prompt)
     # Generate the next frame
-    new_frame = sample_frame(model, prompt, image_sequence_tensor, pos_map=pos_map, leftclick_map=leftclick_map)
     # Convert the generated frame to the correct format
     new_frame = new_frame.transpose(1, 2, 0)

     prompt = " ".join(action_descriptions[-8:])
     print(prompt)
     #prompt = "N N N N N : N N N N N N N N N N : N N N N N N N N N N : N N N N N N N N N N : N N N N N N N N N N : N N N N N N N N N N : N N N N N N N N N N : N N N N N + 0 3 0 7 : + 0 3 7 5"
+    #x, y, action_type = parse_action_string(action_descriptions[-1])
+    #pos_map, leftclick_map, x_scaled, y_scaled = create_position_and_click_map((x, y), action_type)
+    leftclick_maps = []
+    pos_maps = []
+    for j in range(1, 9):
+        x, y, action_type = parse_action_string(action_descriptions[-j])
+        pos_map_j, leftclick_map_j, x_scaled_j, y_scaled_j = create_position_and_click_map((x, y), action_type)
+        leftclick_maps.append(leftclick_map_j)
+        pos_maps.append(pos_map_j)
+        if j == 1:
+            x_scaled = x_scaled_j
+            y_scaled = y_scaled_j
     #prompt = ''
     #prompt = "1~1 0~0 0~0 0~0 0~0 0~0 0~0 0~0"
     print(prompt)
     # Generate the next frame
+    new_frame = sample_frame(model, prompt, image_sequence_tensor, pos_maps=pos_maps, leftclick_maps=leftclick_maps)
     # Convert the generated frame to the correct format
     new_frame = new_frame.transpose(1, 2, 0)

utils.py CHANGED Viewed

@@ -28,7 +28,7 @@ def load_model_from_config(config_path, model_name, device='cuda'):
     model.eval()
     return model
-def sample_frame(model: LatentDiffusion, prompt: str, image_sequence: torch.Tensor, pos_map=None, leftclick_map=None):
     sampler = DDIMSampler(model)
     with torch.no_grad():
@@ -46,9 +46,11 @@ def sample_frame(model: LatentDiffusion, prompt: str, image_sequence: torch.Tens
         print (image_sequence.shape, padding_mask.shape, c['c_concat'].shape)
         c['c_concat'] = c['c_concat'] * (~padding_mask.unsqueeze(-1).unsqueeze(-1))  # Zero out the corresponding features
-        if pos_map is not None:
-            print (pos_map.shape, c['c_concat'].shape)
-            c['c_concat'] = torch.cat([c['c_concat'][:, :, :, :], pos_map.to(c['c_concat'].device).unsqueeze(0), leftclick_map.to(c['c_concat'].device).unsqueeze(0)], dim=1)
         print ('sleeping')
         #time.sleep(120)

     model.eval()
     return model
+def sample_frame(model: LatentDiffusion, prompt: str, image_sequence: torch.Tensor, pos_maps=None, leftclick_maps=None):
     sampler = DDIMSampler(model)
     with torch.no_grad():
         print (image_sequence.shape, padding_mask.shape, c['c_concat'].shape)
         c['c_concat'] = c['c_concat'] * (~padding_mask.unsqueeze(-1).unsqueeze(-1))  # Zero out the corresponding features
+        if pos_maps is not None:
+            pos_map = pos_maps[0]
+            leftclick_map = torch.cat(leftclick_maps, dim=0)
+            print (pos_maps[0].shape, c['c_concat'].shape, leftclick_map.shape)
+            c['c_concat'] = torch.cat([c['c_concat'][:, :, :, :], pos_maps[0].to(c['c_concat'].device).unsqueeze(0), leftclick_maps[0].to(c['c_concat'].device).unsqueeze(0)], dim=1)
         print ('sleeping')
         #time.sleep(120)