neural-os

Runtime error

da03 commited on Feb 17

Commit

6ee36ca

1 Parent(s): 497c0a8

.

Files changed (2) hide show

main.py CHANGED Viewed

@@ -204,16 +204,21 @@ def format_action(action_str, is_padding=False, is_leftclick=False):
     # Format with sign and proper spacing
     return prefix + " " + f"{'+ ' if x >= 0 else '- '}{x_spaced} : {'+ ' if y >= 0 else '- '}{y_spaced}"
 def predict_next_frame(previous_frames, previous_actions: List[Tuple[str, List[int]]]) -> np.ndarray:
-    width, height = 512, 384
     all_click_positions = []
-    initial_images = load_initial_images(width, height)
-    print ('length of previous_frames', len(previous_frames))
-    padding_image = torch.zeros((height//8, width//8, 4)).to(device)
     # Prepare the image sequence for the model
-    assert len(initial_images) == 32
     image_sequence = previous_frames[-32:]  # Take the last 7 frames
     i = 1
     while len(image_sequence) < 32:

     # Format with sign and proper spacing
     return prefix + " " + f"{'+ ' if x >= 0 else '- '}{x_spaced} : {'+ ' if y >= 0 else '- '}{y_spaced}"
+width, height = 512, 384
+padding_image = torch.zeros((height//8, width//8, 4)).to(device)
+data_mean = -0.54
+data_std = 6.78
+data_min = -27.681446075439453
+data_max = 30.854148864746094
+padding_image = (padding_image - data_mean) / data_std
 def predict_next_frame(previous_frames, previous_actions: List[Tuple[str, List[int]]]) -> np.ndarray:
     all_click_positions = []
+    #initial_images = load_initial_images(width, height)
+    #print ('length of previous_frames', len(previous_frames))
     # Prepare the image sequence for the model
+    #assert len(initial_images) == 32
     image_sequence = previous_frames[-32:]  # Take the last 7 frames
     i = 1
     while len(image_sequence) < 32:

utils.py CHANGED Viewed

@@ -57,11 +57,7 @@ def sample_frame(model: LatentDiffusion, prompt: str, image_sequence: torch.Tens
         #padding_mask = padding_mask.repeat(1, 4)  # Repeat mask 4 times for each projected channel
         #print (image_sequence.shape, padding_mask.shape, c['c_concat'].shape)
         #c['c_concat'] = c['c_concat'] * (~padding_mask.unsqueeze(-1).unsqueeze(-1))  # Zero out the corresponding features
-        data_mean = -0.54
-        data_std = 6.78
-        data_min = -27.681446075439453
-        data_max = 30.854148864746094
-        c['c_concat'] = (c['c_concat'] - data_mean) / data_std
         if pos_maps is not None:
             pos_map = pos_maps[0]

         #padding_mask = padding_mask.repeat(1, 4)  # Repeat mask 4 times for each projected channel
         #print (image_sequence.shape, padding_mask.shape, c['c_concat'].shape)
         #c['c_concat'] = c['c_concat'] * (~padding_mask.unsqueeze(-1).unsqueeze(-1))  # Zero out the corresponding features
         if pos_maps is not None:
             pos_map = pos_maps[0]