da03 commited on
Commit
100405c
·
1 Parent(s): b8ac450
Files changed (3) hide show
  1. config_final_model.yaml +104 -0
  2. latent_stats.json +0 -0
  3. main.py +13 -9
config_final_model.yaml ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ save_path: saved_standard_challenging_context32_nocond_cont_cont_all_cont_eval
2
+
3
+ model:
4
+ base_learning_rate: 8.0e-05
5
+ target: ldm.models.diffusion.ddpm.LatentDiffusion
6
+ params:
7
+ linear_start: 0.0015
8
+ linear_end: 0.0195
9
+ num_timesteps_cond: 1
10
+ log_every_t: 200
11
+ timesteps: 1000
12
+ first_stage_key: image
13
+ cond_stage_key: action_
14
+ scheduler_sampling_rate: 0.0
15
+ hybrid_key: c_concat
16
+ image_size: [64, 48]
17
+ channels: 3
18
+ cond_stage_trainable: false
19
+ conditioning_key: hybrid
20
+ monitor: val/loss_simple_ema
21
+
22
+ unet_config:
23
+ target: ldm.modules.diffusionmodules.openaimodel.UNetModel
24
+ params:
25
+ image_size: [64, 48]
26
+ in_channels: 48
27
+ out_channels: 16
28
+ model_channels: 512
29
+ attention_resolutions: []
30
+ num_res_blocks: 2
31
+ channel_mult:
32
+ - 1
33
+ - 2
34
+ num_head_channels: 32
35
+ use_spatial_transformer: false
36
+ transformer_depth: 1
37
+
38
+ temporal_encoder_config:
39
+ target: ldm.modules.encoders.temporal_encoder.TemporalEncoder
40
+ params:
41
+ input_channels: 16
42
+ hidden_size: 4096
43
+ num_layers: 1
44
+ dropout: 0.1
45
+ output_channels: 32
46
+ output_height: 48
47
+ output_width: 64
48
+
49
+ first_stage_config:
50
+ target: ldm.models.autoencoder.AutoencoderKL
51
+ params:
52
+ embed_dim: 16
53
+ monitor: val/rec_loss
54
+ ddconfig:
55
+ double_z: true
56
+ z_channels: 16
57
+ resolution: 256
58
+ in_channels: 3
59
+ out_ch: 3
60
+ ch: 128
61
+ ch_mult:
62
+ - 1
63
+ - 2
64
+ - 4
65
+ - 4
66
+ num_res_blocks: 2
67
+ attn_resolutions: []
68
+ dropout: 0.0
69
+ lossconfig:
70
+ target: torch.nn.Identity
71
+
72
+ cond_stage_config: __is_unconditional__
73
+
74
+ data:
75
+ target: data.data_processing.datasets.DataModule
76
+ params:
77
+ batch_size: 8
78
+ num_workers: 1
79
+ wrap: false
80
+ shuffle: True
81
+ drop_last: True
82
+ pin_memory: True
83
+ prefetch_factor: 2
84
+ persistent_workers: True
85
+ train:
86
+ target: data.data_processing.datasets.ActionsData
87
+ params:
88
+ data_csv_path: desktop_sequences_filtered_with_desktop_1.5k.challenging.train.target_frames.csv
89
+ normalization: standard
90
+ context_length: 32
91
+ #validation:
92
+ # target: data.data_processing.datasets.ActionsData
93
+ # params:
94
+
95
+ lightning:
96
+ trainer:
97
+ benchmark: False
98
+ max_epochs: 6400
99
+ limit_val_batches: 0
100
+ accelerator: gpu
101
+ gpus: 1
102
+ accumulate_grad_batches: 999999
103
+ gradient_clip_val: 1
104
+ checkpoint_callback: True
latent_stats.json ADDED
The diff for this file is too large to render. See raw diff
 
main.py CHANGED
@@ -17,21 +17,25 @@ import concurrent.futures
17
 
18
  torch.backends.cuda.matmul.allow_tf32 = True
19
  torch.backends.cudnn.allow_tf32 = True
 
 
 
 
 
20
  SCREEN_WIDTH = 512
21
  SCREEN_HEIGHT = 384
22
  NUM_SAMPLING_STEPS = 8
23
- DATA_NORMALIZATION = {
24
- 'mean': -0.54,
25
- 'std': 6.78,
26
- }
27
- LATENT_DIMS = (4, SCREEN_HEIGHT // 8, SCREEN_WIDTH // 8)
28
 
29
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
 
 
 
30
  # Initialize the model at the start of your application
31
  #model = initialize_model("config_csllm.yaml", "yuntian-deng/computer-model")
32
- model = initialize_model("config_rnn.yaml", "yuntian-deng/computer-model")
 
33
 
34
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
35
  model = model.to(device)
36
  #model = torch.compile(model)
37
 
@@ -148,7 +152,7 @@ def _process_frame_sync(model, inputs):
148
 
149
  # Decoding
150
  start = time.perf_counter()
151
- sample = sample_latent * DATA_NORMALIZATION['std'] + DATA_NORMALIZATION['mean']
152
 
153
  # Use time.sleep(10) here since it's in a separate thread
154
  #time.sleep(10)
 
17
 
18
  torch.backends.cuda.matmul.allow_tf32 = True
19
  torch.backends.cudnn.allow_tf32 = True
20
+
21
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
22
+
23
+
24
+
25
  SCREEN_WIDTH = 512
26
  SCREEN_HEIGHT = 384
27
  NUM_SAMPLING_STEPS = 8
 
 
 
 
 
28
 
29
+ with open('latent_stats.json', 'r') as f:
30
+ latent_stats = json.load(f)
31
+ DATA_NORMALIZATION = {'mean': torch.tensor(latent_stats['mean']).to(device), 'std': torch.tensor(latent_stats['std']).to(device)}
32
+ LATENT_DIMS = latent_stats['latent_dims']
33
+
34
  # Initialize the model at the start of your application
35
  #model = initialize_model("config_csllm.yaml", "yuntian-deng/computer-model")
36
+ #model = initialize_model("config_rnn.yaml", "yuntian-deng/computer-model")
37
+ model = initialize_model("config_final_model.yaml", "yuntian-deng/computer-model")
38
 
 
39
  model = model.to(device)
40
  #model = torch.compile(model)
41
 
 
152
 
153
  # Decoding
154
  start = time.perf_counter()
155
+ sample = sample_latent * DATA_NORMALIZATION['std'].view(1, -1, 1, 1) + DATA_NORMALIZATION['mean'].view(1, -1, 1, 1)
156
 
157
  # Use time.sleep(10) here since it's in a separate thread
158
  #time.sleep(10)