neural-os

Runtime error

App Files Files Community

da03 commited on Dec 9, 2024

Commit

6eec349

1 Parent(s): 7e76843

.

Browse files

Files changed (2) hide show

main.py +15 -14
utils.py +2 -2

main.py CHANGED Viewed

@@ -13,7 +13,7 @@ import os
 import time
 DEBUG = False
-DEBUG_TEACHER_FORCING = False
 app = FastAPI()
 # Mount the static directory to serve HTML, JavaScript, and CSS files
@@ -426,18 +426,18 @@ async def websocket_endpoint(websocket: WebSocket):
     if not DEBUG_TEACHER_FORCING:
         previous_actions = []
-    for t in range(15):  # Generate 15 actions
         # Random movement
-        x = np.random.randint(0, 64)
-        y = np.random.randint(0, 48)
-        #x = max(0, min(63, x + dx))
-        #y = max(0, min(47, y + dy))
-        # Random click with 20% probability
-        if np.random.random() < 0.2:
-            action_type = 'L'
-        else:
-            action_type = 'N'
         # Format action string
         previous_actions.append((action_type, (x*8, y*8)))
@@ -465,7 +465,7 @@ async def websocket_endpoint(websocket: WebSocket):
                 # Store the actions
-                if DEBUG:
                     position = positions[0]
                     #positions = positions[1:]
                     #mouse_position = position.split('~')
@@ -498,12 +498,13 @@ async def websocket_endpoint(websocket: WebSocket):
                 next_frame, next_frame_append = predict_next_frame(previous_frames, previous_actions)
                 # Load and append the corresponding ground truth image instead of model output
                 print ('here4', len(previous_frames))
-                if True and DEBUG_TEACHER_FORCING:
                     img = Image.open(f"record_10003/image_{117+len(previous_frames)}.png")
                     previous_frames.append(img)
                 else:
                     #assert False
-                    previous_frames.append(next_frame_append)
                 previous_frames = []
                 # Convert the numpy array to a base64 encoded image

 import time
 DEBUG = False
+DEBUG_TEACHER_FORCING = True
 app = FastAPI()
 # Mount the static directory to serve HTML, JavaScript, and CSS files
     if not DEBUG_TEACHER_FORCING:
         previous_actions = []
+        for t in range(15):  # Generate 15 actions
         # Random movement
+            x = np.random.randint(0, 64)
+            y = np.random.randint(0, 48)
+            #x = max(0, min(63, x + dx))
+            #y = max(0, min(47, y + dy))
+            # Random click with 20% probability
+            if np.random.random() < 0.2:
+                action_type = 'L'
+            else:
+                action_type = 'N'
         # Format action string
         previous_actions.append((action_type, (x*8, y*8)))
                 # Store the actions
+                if False and DEBUG:
                     position = positions[0]
                     #positions = positions[1:]
                     #mouse_position = position.split('~')
                 next_frame, next_frame_append = predict_next_frame(previous_frames, previous_actions)
                 # Load and append the corresponding ground truth image instead of model output
                 print ('here4', len(previous_frames))
+                if False and DEBUG_TEACHER_FORCING:
                     img = Image.open(f"record_10003/image_{117+len(previous_frames)}.png")
                     previous_frames.append(img)
                 else:
                     #assert False
+                    #previous_frames.append(next_frame_append)
+                    pass
                 previous_frames = []
                 # Convert the numpy array to a base64 encoded image

utils.py CHANGED Viewed

@@ -55,7 +55,7 @@ def sample_frame(model: LatentDiffusion, prompt: str, image_sequence: torch.Tens
             pos_map = pos_maps[0]
             leftclick_map = torch.cat(leftclick_maps, dim=0)
             print (pos_maps[0].shape, c['c_concat'].shape, leftclick_map.shape)
-            if DEBUG:
                 c['c_concat'] = c['c_concat']*0
             c['c_concat'] = torch.cat([c['c_concat'][:, :, :, :], pos_maps[0].to(c['c_concat'].device).unsqueeze(0), leftclick_map.to(c['c_concat'].device).unsqueeze(0)], dim=1)
@@ -82,7 +82,7 @@ def sample_frame(model: LatentDiffusion, prompt: str, image_sequence: torch.Tens
         #                                 unconditional_guidance_scale=5.0,
         #                                 unconditional_conditioning=uc,
         #                                 eta=0)
-        if DEBUG:
             print ('samples_ddim.shape', samples_ddim.shape)
             x_samples_ddim = samples_ddim[:, :3]
             # upsample to 512 x 384

             pos_map = pos_maps[0]
             leftclick_map = torch.cat(leftclick_maps, dim=0)
             print (pos_maps[0].shape, c['c_concat'].shape, leftclick_map.shape)
+            if False and DEBUG:
                 c['c_concat'] = c['c_concat']*0
             c['c_concat'] = torch.cat([c['c_concat'][:, :, :, :], pos_maps[0].to(c['c_concat'].device).unsqueeze(0), leftclick_map.to(c['c_concat'].device).unsqueeze(0)], dim=1)
         #                                 unconditional_guidance_scale=5.0,
         #                                 unconditional_conditioning=uc,
         #                                 eta=0)
+        if False and DEBUG:
             print ('samples_ddim.shape', samples_ddim.shape)
             x_samples_ddim = samples_ddim[:, :3]
             # upsample to 512 x 384