neural-os

Runtime error

App Files Files Community

da03 commited on Apr 16

Commit

100405c

1 Parent(s): b8ac450

.

Browse files

Files changed (3) hide show

config_final_model.yaml +104 -0
latent_stats.json +0 -0
main.py +13 -9

config_final_model.yaml ADDED Viewed

	@@ -0,0 +1,104 @@

+save_path: saved_standard_challenging_context32_nocond_cont_cont_all_cont_eval
+model:
+  base_learning_rate: 8.0e-05
+  target: ldm.models.diffusion.ddpm.LatentDiffusion
+  params:
+    linear_start: 0.0015
+    linear_end: 0.0195
+    num_timesteps_cond: 1
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: image
+    cond_stage_key: action_
+    scheduler_sampling_rate: 0.0
+    hybrid_key: c_concat
+    image_size: [64, 48]
+    channels: 3
+    cond_stage_trainable: false
+    conditioning_key: hybrid
+    monitor: val/loss_simple_ema
+    unet_config:
+      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        image_size: [64, 48]
+        in_channels: 48
+        out_channels: 16
+        model_channels: 512
+        attention_resolutions: []
+        num_res_blocks: 2
+        channel_mult:
+        - 1
+        - 2
+        num_head_channels: 32
+        use_spatial_transformer: false
+        transformer_depth: 1
+    temporal_encoder_config:
+      target: ldm.modules.encoders.temporal_encoder.TemporalEncoder
+      params:
+        input_channels: 16
+        hidden_size: 4096
+        num_layers: 1
+        dropout: 0.1
+        output_channels: 32
+        output_height: 48
+        output_width: 64
+    first_stage_config:
+      target: ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 16
+        monitor: val/rec_loss
+        ddconfig:
+          double_z: true
+          z_channels: 16
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+          - 1
+          - 2
+          - 4
+          - 4
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+    cond_stage_config: __is_unconditional__
+data:
+  target: data.data_processing.datasets.DataModule
+  params:
+    batch_size: 8
+    num_workers: 1
+    wrap: false
+    shuffle: True
+    drop_last: True
+    pin_memory: True
+    prefetch_factor: 2
+    persistent_workers: True
+    train:
+      target: data.data_processing.datasets.ActionsData
+      params:
+        data_csv_path: desktop_sequences_filtered_with_desktop_1.5k.challenging.train.target_frames.csv
+        normalization: standard
+        context_length: 32
+        #validation:
+        #  target: data.data_processing.datasets.ActionsData
+        #  params:
+lightning:
+  trainer:
+    benchmark: False
+    max_epochs: 6400
+    limit_val_batches: 0
+    accelerator: gpu
+    gpus: 1
+    accumulate_grad_batches: 999999
+    gradient_clip_val: 1
+    checkpoint_callback: True

latent_stats.json ADDED Viewed

The diff for this file is too large to render. See raw diff

main.py CHANGED Viewed

@@ -17,21 +17,25 @@ import concurrent.futures
 torch.backends.cuda.matmul.allow_tf32 = True
 torch.backends.cudnn.allow_tf32 = True
 SCREEN_WIDTH = 512
 SCREEN_HEIGHT = 384
 NUM_SAMPLING_STEPS = 8
-DATA_NORMALIZATION = {
-    'mean': -0.54,
-    'std': 6.78,
-}
-LATENT_DIMS = (4, SCREEN_HEIGHT // 8, SCREEN_WIDTH // 8)
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 # Initialize the model at the start of your application
 #model = initialize_model("config_csllm.yaml", "yuntian-deng/computer-model")
-model = initialize_model("config_rnn.yaml", "yuntian-deng/computer-model")
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 model = model.to(device)
 #model = torch.compile(model)
@@ -148,7 +152,7 @@ def _process_frame_sync(model, inputs):
     # Decoding
     start = time.perf_counter()
-    sample = sample_latent * DATA_NORMALIZATION['std'] + DATA_NORMALIZATION['mean']
     # Use time.sleep(10) here since it's in a separate thread
     #time.sleep(10)

 torch.backends.cuda.matmul.allow_tf32 = True
 torch.backends.cudnn.allow_tf32 = True
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 SCREEN_WIDTH = 512
 SCREEN_HEIGHT = 384
 NUM_SAMPLING_STEPS = 8
+with open('latent_stats.json', 'r') as f:
+    latent_stats = json.load(f)
+DATA_NORMALIZATION = {'mean': torch.tensor(latent_stats['mean']).to(device), 'std': torch.tensor(latent_stats['std']).to(device)}
+LATENT_DIMS = latent_stats['latent_dims']
 # Initialize the model at the start of your application
 #model = initialize_model("config_csllm.yaml", "yuntian-deng/computer-model")
+#model = initialize_model("config_rnn.yaml", "yuntian-deng/computer-model")
+model = initialize_model("config_final_model.yaml", "yuntian-deng/computer-model")
 model = model.to(device)
 #model = torch.compile(model)
     # Decoding
     start = time.perf_counter()
+    sample = sample_latent * DATA_NORMALIZATION['std'].view(1, -1, 1, 1) + DATA_NORMALIZATION['mean'].view(1, -1, 1, 1)
     # Use time.sleep(10) here since it's in a separate thread
     #time.sleep(10)