k-l-lambda commited on
Commit
5121da0
·
1 Parent(s): 438adad

train folder of 20240508 commit.

Browse files
Files changed (28) hide show
  1. 20240418-stage1-dance800/config.yaml +103 -0
  2. 20240422-stage1-ubc+td1/config.yaml +106 -0
  3. 20240423-stage1-ubc+td10/config.yaml +106 -0
  4. 20240425-stage2-openpg/config.yaml +92 -0
  5. 20240428-stage2-6k/config.yaml +97 -0
  6. 20240504-stage1-51k-raw-opg/config.yaml +105 -0
  7. 20240508-stage1-openpg-nopaf/checkpoint-68000/optimizer.bin +3 -0
  8. 20240508-stage1-openpg-nopaf/checkpoint-68000/pytorch_model.bin +3 -0
  9. 20240508-stage1-openpg-nopaf/checkpoint-68000/random_states_0.pkl +3 -0
  10. 20240508-stage1-openpg-nopaf/checkpoint-68000/scaler.pt +3 -0
  11. 20240508-stage1-openpg-nopaf/checkpoint-68000/scheduler.bin +3 -0
  12. 20240508-stage1-openpg-nopaf/checkpoint-69000/optimizer.bin +3 -0
  13. 20240508-stage1-openpg-nopaf/checkpoint-69000/pytorch_model.bin +3 -0
  14. 20240508-stage1-openpg-nopaf/checkpoint-69000/random_states_0.pkl +3 -0
  15. 20240508-stage1-openpg-nopaf/checkpoint-69000/scaler.pt +3 -0
  16. 20240508-stage1-openpg-nopaf/checkpoint-69000/scheduler.bin +3 -0
  17. 20240508-stage1-openpg-nopaf/config.yaml +108 -0
  18. 20240508-stage1-openpg-nopaf/denoising_unet-65381.pth +3 -0
  19. 20240508-stage1-openpg-nopaf/denoising_unet-66980.pth +3 -0
  20. 20240508-stage1-openpg-nopaf/denoising_unet-68579.pth +3 -0
  21. 20240508-stage1-openpg-nopaf/pose_guider-65381.pth +3 -0
  22. 20240508-stage1-openpg-nopaf/pose_guider-66980.pth +3 -0
  23. 20240508-stage1-openpg-nopaf/pose_guider-68579.pth +3 -0
  24. 20240508-stage1-openpg-nopaf/reference_unet-65381.pth +3 -0
  25. 20240508-stage1-openpg-nopaf/reference_unet-66980.pth +3 -0
  26. 20240508-stage1-openpg-nopaf/reference_unet-68579.pth +3 -0
  27. 20240510-stage1-9k/config.yaml +105 -0
  28. 20240513-stage2-9k/config.yaml +99 -0
20240418-stage1-dance800/config.yaml ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model_path: ./pretrained_weights/sd-image-variations-diffusers
2
+ checkpointing_steps: 1000
3
+ controlnet_openpose_path: ./pretrained_weights/control_v11p_sd15_openpose/diffusion_pytorch_model.bin
4
+ data:
5
+ crop_scale:
6
+ - 0.6
7
+ - 1
8
+ do_center_crop: false
9
+ meta_paths:
10
+ - /workspace/develop/video/data/tiktok-dance/good-meta.json
11
+ ref_augment:
12
+ downsample:
13
+ min_scale_logit: -1.2
14
+ p: 0.3
15
+ pan:
16
+ - 0.04
17
+ - 0.02
18
+ rotate: 8
19
+ scale:
20
+ - 0.9
21
+ - 1.4
22
+ sample_margin: 30
23
+ train_bs: 4
24
+ train_height: 1152
25
+ train_width: 768
26
+ enable_zero_snr: true
27
+ exp_name: stage1-dance800
28
+ freeze_denoise: false
29
+ freeze_reference: false
30
+ image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
31
+ noise_offset: 0.05
32
+ noise_scheduler_kwargs:
33
+ beta_end: 0.012
34
+ beta_schedule: scaled_linear
35
+ beta_start: 0.00085
36
+ clip_sample: false
37
+ num_train_timesteps: 1000
38
+ steps_offset: 1
39
+ openpose_guider:
40
+ enable: false
41
+ output_dir: /workspace/camus/train
42
+ pose_guider_pretrain: true
43
+ resume_from_checkpoint: latest
44
+ save_dir: /workspace/camus/train/20240418-stage1-dance800/
45
+ save_model_epoch_interval: 1
46
+ seed: 12580
47
+ snr_gamma: 5.0
48
+ solver:
49
+ adam_beta1: 0.9
50
+ adam_beta2: 0.999
51
+ adam_epsilon: 1.0e-08
52
+ adam_weight_decay: 0.01
53
+ enable_xformers_memory_efficient_attention: true
54
+ gradient_accumulation_steps: 1
55
+ gradient_checkpointing: false
56
+ learning_rate: 1.0e-05
57
+ lr_scheduler: constant
58
+ lr_warmup_steps: 1
59
+ max_grad_norm: 1.0
60
+ max_train_steps: 30000
61
+ mixed_precision: fp16
62
+ scale_lr: false
63
+ use_8bit_adam: false
64
+ uncond_ratio: 0.1
65
+ vae_model_path: ./pretrained_weights/sd-vae-ft-mse
66
+ val:
67
+ special_steps:
68
+ - 24001
69
+ validation_steps: 1000
70
+ validation:
71
+ metric:
72
+ batch_size: 4
73
+ generated_frames:
74
+ - 16
75
+ - 45
76
+ - 98
77
+ - 150
78
+ - 188
79
+ - 220
80
+ - 268
81
+ - 284
82
+ guidance_scale: 2.4
83
+ ref_frame: 16
84
+ seed: 42
85
+ size:
86
+ - 768
87
+ - 1152
88
+ steps: 20
89
+ videos:
90
+ - configs/inference/metric/o4flk5RPE4D4fgNEUNFRZIbOBjCsEgB9DQQQLA.mp4
91
+ - configs/inference/metric/ocMJyAEDjQzjwqAuIPABAvmRyofjKiYhBExati.mp4
92
+ - configs/inference/metric/ocQoBObnUgBnVskCnPe41sYRiBcAFD5f8AN1Rg.mp4
93
+ - configs/inference/metric/oE2tJFpDWANbMe7cxAg3hoq0QAPNeRnCQefG8F.mp4
94
+ - configs/inference/metric/oEOgTIKvy7lAQIfZ37E5BFmCVBID3gIQUQfMdv.mp4
95
+ - configs/inference/metric/oEtwozJ6AoIBJ6oyK6rAAQGAOiEWIQixF2F2fB.mp4
96
+ - configs/inference/metric/oUGmQqeqoAAega5fGgnAICfOWJRAAFTJCgDibU.mp4
97
+ - configs/inference/metric/oYQJ5zLiEgG71SAAFEfAeVIEVFROW4ZGQJTfeF.mp4
98
+ pose_image_paths:
99
+ - configs/inference/pose_images/A1eEZvfJRUS/frame70.png
100
+ ref_image_paths:
101
+ - configs/inference/ref_images/anyone-3.png
102
+ - configs/inference/ref_images/anyone-11.png
103
+ weight_dtype: fp16
20240422-stage1-ubc+td1/config.yaml ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model_path: ./pretrained_weights/sd-image-variations-diffusers
2
+ checkpointing_steps: 1000
3
+ controlnet_openpose_path: ./pretrained_weights/control_v11p_sd15_openpose/diffusion_pytorch_model.bin
4
+ data:
5
+ crop_scale:
6
+ - 0.8
7
+ - 1.2
8
+ do_center_crop: false
9
+ meta_paths:
10
+ - /workspace/develop/video/data/ubc_tiktok-dropout0.03/ubc-meta.json
11
+ - /workspace/develop/video/data/tiktok-dance/meta-1per_person.json
12
+ ref_augment:
13
+ downsample:
14
+ min_scale_logit: -1.2
15
+ p: 0.3
16
+ pan:
17
+ - 0.04
18
+ - 0.02
19
+ rotate: 8
20
+ scale:
21
+ - 0.9
22
+ - 1.2
23
+ sample_margin: 30
24
+ train_bs: 4
25
+ train_height: 1152
26
+ train_width: 768
27
+ enable_zero_snr: true
28
+ exp_name: stage1-ubc+td1
29
+ freeze_denoise: false
30
+ freeze_reference: false
31
+ image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
32
+ noise_offset: 0.05
33
+ noise_scheduler_kwargs:
34
+ beta_end: 0.012
35
+ beta_schedule: scaled_linear
36
+ beta_start: 0.00085
37
+ clip_sample: false
38
+ num_train_timesteps: 1000
39
+ steps_offset: 1
40
+ openpose_guider:
41
+ enable: false
42
+ output_dir: /workspace/camus/train
43
+ pose_guider_pretrain: true
44
+ resume_from_checkpoint: latest
45
+ save_dir: /workspace/camus/train/20240422-stage1-ubc+td1
46
+ save_model_epoch_interval: 1
47
+ seed: 12580
48
+ snr_gamma: 5.0
49
+ solver:
50
+ adam_beta1: 0.9
51
+ adam_beta2: 0.999
52
+ adam_epsilon: 1.0e-08
53
+ adam_weight_decay: 0.01
54
+ enable_xformers_memory_efficient_attention: true
55
+ gradient_accumulation_steps: 1
56
+ gradient_checkpointing: false
57
+ learning_rate: 1.0e-05
58
+ lr_scheduler: constant
59
+ lr_warmup_steps: 1
60
+ max_grad_norm: 1.0
61
+ max_train_steps: 30000
62
+ mixed_precision: fp16
63
+ scale_lr: false
64
+ use_8bit_adam: false
65
+ uncond_ratio: 0.1
66
+ vae_model_path: ./pretrained_weights/sd-vae-ft-mse
67
+ val:
68
+ validation_steps: 2000
69
+ validation:
70
+ metric:
71
+ batch_size: 4
72
+ generated_frames:
73
+ - 16
74
+ - 45
75
+ - 98
76
+ - 150
77
+ - 188
78
+ - 220
79
+ - 268
80
+ - 284
81
+ guidance_scale: 2.4
82
+ ref_frame: 16
83
+ seed: 42
84
+ size:
85
+ - 768
86
+ - 1152
87
+ steps: 20
88
+ videos:
89
+ - configs/inference/metric/o4flk5RPE4D4fgNEUNFRZIbOBjCsEgB9DQQQLA.mp4
90
+ - configs/inference/metric/ocMJyAEDjQzjwqAuIPABAvmRyofjKiYhBExati.mp4
91
+ - configs/inference/metric/ocQoBObnUgBnVskCnPe41sYRiBcAFD5f8AN1Rg.mp4
92
+ - configs/inference/metric/oE2tJFpDWANbMe7cxAg3hoq0QAPNeRnCQefG8F.mp4
93
+ - configs/inference/metric/oEOgTIKvy7lAQIfZ37E5BFmCVBID3gIQUQfMdv.mp4
94
+ - configs/inference/metric/oEtwozJ6AoIBJ6oyK6rAAQGAOiEWIQixF2F2fB.mp4
95
+ - configs/inference/metric/oUGmQqeqoAAega5fGgnAICfOWJRAAFTJCgDibU.mp4
96
+ - configs/inference/metric/oYQJ5zLiEgG71SAAFEfAeVIEVFROW4ZGQJTfeF.mp4
97
+ pose_image_paths:
98
+ - configs/inference/pose_images/A1eEZvfJRUS/frame70.png
99
+ - configs/inference/pose_images/A1eEZvfJRUS/frame150.png
100
+ - configs/inference/pose_images/A1eEZvfJRUS/frame190.png
101
+ ref_image_paths:
102
+ - configs/inference/ref_images/anyone-1.png
103
+ - configs/inference/ref_images/anyone-2.png
104
+ - configs/inference/ref_images/anyone-3.png
105
+ - configs/inference/ref_images/anyone-11.png
106
+ weight_dtype: fp16
20240423-stage1-ubc+td10/config.yaml ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model_path: ./pretrained_weights/sd-image-variations-diffusers
2
+ checkpointing_steps: 1000
3
+ controlnet_openpose_path: ./pretrained_weights/control_v11p_sd15_openpose/diffusion_pytorch_model.bin
4
+ data:
5
+ crop_scale:
6
+ - 0.8
7
+ - 1.2
8
+ do_center_crop: false
9
+ meta_paths:
10
+ - /workspace/develop/video/data/ubc_tiktok-dropout0.03/ubc-meta.json
11
+ - /workspace/develop/video/data/tiktok-dance/meta-10per_person.json
12
+ ref_augment:
13
+ downsample:
14
+ min_scale_logit: -1.2
15
+ p: 0.3
16
+ pan:
17
+ - 0.04
18
+ - 0.02
19
+ rotate: 8
20
+ scale:
21
+ - 0.9
22
+ - 1.2
23
+ sample_margin: 30
24
+ train_bs: 4
25
+ train_height: 1152
26
+ train_width: 768
27
+ enable_zero_snr: true
28
+ exp_name: stage1-ubc+td10
29
+ freeze_denoise: false
30
+ freeze_reference: false
31
+ image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
32
+ noise_offset: 0.05
33
+ noise_scheduler_kwargs:
34
+ beta_end: 0.012
35
+ beta_schedule: scaled_linear
36
+ beta_start: 0.00085
37
+ clip_sample: false
38
+ num_train_timesteps: 1000
39
+ steps_offset: 1
40
+ openpose_guider:
41
+ enable: false
42
+ output_dir: /workspace/camus/train
43
+ pose_guider_pretrain: true
44
+ resume_from_checkpoint: latest
45
+ save_dir: /workspace/camus/train/20240423-stage1-ubc+td10
46
+ save_model_epoch_interval: 1
47
+ seed: 12580
48
+ snr_gamma: 5.0
49
+ solver:
50
+ adam_beta1: 0.9
51
+ adam_beta2: 0.999
52
+ adam_epsilon: 1.0e-08
53
+ adam_weight_decay: 0.01
54
+ enable_xformers_memory_efficient_attention: true
55
+ gradient_accumulation_steps: 1
56
+ gradient_checkpointing: false
57
+ learning_rate: 1.0e-05
58
+ lr_scheduler: constant
59
+ lr_warmup_steps: 1
60
+ max_grad_norm: 1.0
61
+ max_train_steps: 30000
62
+ mixed_precision: fp16
63
+ scale_lr: false
64
+ use_8bit_adam: false
65
+ uncond_ratio: 0.1
66
+ vae_model_path: ./pretrained_weights/sd-vae-ft-mse
67
+ val:
68
+ validation_steps: 2000
69
+ validation:
70
+ metric:
71
+ batch_size: 4
72
+ generated_frames:
73
+ - 16
74
+ - 45
75
+ - 98
76
+ - 150
77
+ - 188
78
+ - 220
79
+ - 268
80
+ - 284
81
+ guidance_scale: 2.4
82
+ ref_frame: 16
83
+ seed: 42
84
+ size:
85
+ - 768
86
+ - 1152
87
+ steps: 20
88
+ videos:
89
+ - configs/inference/metric/o4flk5RPE4D4fgNEUNFRZIbOBjCsEgB9DQQQLA.mp4
90
+ - configs/inference/metric/ocMJyAEDjQzjwqAuIPABAvmRyofjKiYhBExati.mp4
91
+ - configs/inference/metric/ocQoBObnUgBnVskCnPe41sYRiBcAFD5f8AN1Rg.mp4
92
+ - configs/inference/metric/oE2tJFpDWANbMe7cxAg3hoq0QAPNeRnCQefG8F.mp4
93
+ - configs/inference/metric/oEOgTIKvy7lAQIfZ37E5BFmCVBID3gIQUQfMdv.mp4
94
+ - configs/inference/metric/oEtwozJ6AoIBJ6oyK6rAAQGAOiEWIQixF2F2fB.mp4
95
+ - configs/inference/metric/oUGmQqeqoAAega5fGgnAICfOWJRAAFTJCgDibU.mp4
96
+ - configs/inference/metric/oYQJ5zLiEgG71SAAFEfAeVIEVFROW4ZGQJTfeF.mp4
97
+ pose_image_paths:
98
+ - configs/inference/pose_images/A1eEZvfJRUS/frame70.png
99
+ - configs/inference/pose_images/A1eEZvfJRUS/frame150.png
100
+ - configs/inference/pose_images/A1eEZvfJRUS/frame190.png
101
+ ref_image_paths:
102
+ - configs/inference/ref_images/anyone-1.png
103
+ - configs/inference/ref_images/anyone-2.png
104
+ - configs/inference/ref_images/anyone-3.png
105
+ - configs/inference/ref_images/anyone-11.png
106
+ weight_dtype: fp16
20240425-stage2-openpg/config.yaml ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model_path: ./pretrained_weights/stable-diffusion-v1-5
2
+ checkpointing_steps: 2000
3
+ data:
4
+ crop_scale:
5
+ - 1
6
+ - 1
7
+ do_center_crop: false
8
+ meta_paths:
9
+ - /workspace/develop/video/data/ubc_tiktok-dropout0.03/ubc-meta.json
10
+ - /workspace/develop/video/data/tiktok-dance/good-meta.json
11
+ - /workspace/develop/video/data/20240321/meta.json
12
+ - /workspace/develop/video/data/20240327/meta.json
13
+ n_sample_frames: 24
14
+ ref_augment:
15
+ pan:
16
+ - 0.04
17
+ - 0.04
18
+ rotate: 2
19
+ scale:
20
+ - 0.9
21
+ - 1.0
22
+ sample_rate: 4
23
+ train_bs: 1
24
+ train_height: 960
25
+ train_width: 640
26
+ enable_zero_snr: true
27
+ exp_name: stage2-openpg
28
+ image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
29
+ mm_path: ./pretrained_weights/mm_sd_v15_v2.ckpt
30
+ noise_offset: 0.05
31
+ noise_scheduler_kwargs:
32
+ beta_end: 0.012
33
+ beta_schedule: linear
34
+ beta_start: 0.00085
35
+ clip_sample: false
36
+ num_train_timesteps: 1000
37
+ steps_offset: 1
38
+ openpose_guider:
39
+ block_out_channels:
40
+ - 96
41
+ - 192
42
+ enable: true
43
+ output_dir: /workspace/camus/train
44
+ resume_from_checkpoint: latest
45
+ save_dir: /workspace/camus/train/20240425-stage2-openpg
46
+ save_model_epoch_interval: 1
47
+ seed: 12580
48
+ snr_gamma: 5.0
49
+ solver:
50
+ adam_beta1: 0.9
51
+ adam_beta2: 0.999
52
+ adam_epsilon: 1.0e-08
53
+ adam_weight_decay: 0.01
54
+ enable_xformers_memory_efficient_attention: true
55
+ gradient_accumulation_steps: 1
56
+ gradient_checkpointing: true
57
+ learning_rate: 1.0e-05
58
+ lr_scheduler: constant
59
+ lr_warmup_steps: 1
60
+ max_grad_norm: 1.0
61
+ max_train_steps: 160000
62
+ mixed_precision: fp16
63
+ scale_lr: false
64
+ use_8bit_adam: true
65
+ stage1_ckpt_dir: /workspace/camus/train/20240418-stage1-openpg-c96_192
66
+ stage1_ckpt_step: 86396
67
+ uncond_ratio: 0.1
68
+ vae_model_path: ./pretrained_weights/sd-vae-ft-mse
69
+ val:
70
+ validation_steps: 1000
71
+ validation:
72
+ metric:
73
+ generate_frame_range:
74
+ - 50
75
+ - 74
76
+ guidance_scale: 2.4
77
+ ref_frame: 29
78
+ seed: 42
79
+ steps: 20
80
+ videos:
81
+ - configs/inference/metric/A1ubDo0PbQS.mp4
82
+ - configs/inference/metric/oEtwozJ6AoIBJ6oyK6rAAQGAOiEWIQixF2F2fB.mp4
83
+ - configs/inference/metric/oEOgTIKvy7lAQIfZ37E5BFmCVBID3gIQUQfMdv.mp4
84
+ - configs/inference/metric/ocQoBObnUgBnVskCnPe41sYRiBcAFD5f8AN1Rg.mp4
85
+ pose_range:
86
+ - 24
87
+ - 48
88
+ test_cases:
89
+ - - ./configs/inference/ref_images/anyone-2.png
90
+ - ./configs/inference/metric/91HzMhq7eOS.mp4
91
+ uniform_along_time: false
92
+ weight_dtype: fp16
20240428-stage2-6k/config.yaml ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model_path: ./pretrained_weights/stable-diffusion-v1-5
2
+ checkpointing_steps: 2000
3
+ data:
4
+ crop_scale:
5
+ - 1
6
+ - 1
7
+ do_center_crop: false
8
+ meta_paths:
9
+ - /workspace/develop/video/data/ubc_tiktok-dropout0.03/ubc-meta.json
10
+ - /workspace/develop/video/data/tiktok-dance/good-meta.json
11
+ - /workspace/develop/video/data/20240321/meta.json
12
+ - /workspace/develop/video/data/20240327/meta.json
13
+ n_sample_frames: 24
14
+ ref_augment:
15
+ pan:
16
+ - 0.04
17
+ - 0.04
18
+ rotate: 2
19
+ scale:
20
+ - 0.9
21
+ - 1.2
22
+ sample_rate: 4
23
+ train_bs: 1
24
+ train_height: 960
25
+ train_width: 640
26
+ enable_zero_snr: true
27
+ exp_name: stage2-6k
28
+ image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
29
+ mm_path: ./pretrained_weights/mm_sd_v15_v2.ckpt
30
+ noise_offset: 0.05
31
+ noise_scheduler_kwargs:
32
+ beta_end: 0.012
33
+ beta_schedule: linear
34
+ beta_start: 0.00085
35
+ clip_sample: false
36
+ num_train_timesteps: 1000
37
+ steps_offset: 1
38
+ output_dir: /workspace/camus/train
39
+ resume_from_checkpoint: latest
40
+ save_dir: /workspace/camus/train/20240428-stage2-6k
41
+ save_model_epoch_interval: 1
42
+ seed: 12580
43
+ snr_gamma: 5.0
44
+ solver:
45
+ adam_beta1: 0.9
46
+ adam_beta2: 0.999
47
+ adam_epsilon: 1.0e-08
48
+ adam_weight_decay: 0.01
49
+ enable_xformers_memory_efficient_attention: true
50
+ gradient_accumulation_steps: 1
51
+ gradient_checkpointing: true
52
+ learning_rate: 1.0e-05
53
+ lr_scheduler: constant
54
+ lr_warmup_steps: 1
55
+ max_grad_norm: 1.0
56
+ max_train_steps: 160000
57
+ mixed_precision: fp16
58
+ scale_lr: false
59
+ use_8bit_adam: true
60
+ stage1_ckpt_dir: /workspace/camus/train/20240421-stage1-6k
61
+ stage1_ckpt_step: 78782
62
+ uncond_ratio: 0.1
63
+ vae_model_path: ./pretrained_weights/sd-vae-ft-mse
64
+ val:
65
+ validation_steps: 1000
66
+ validation:
67
+ metric:
68
+ generate_frame_range:
69
+ - 30
70
+ - 54
71
+ guidance_scale: 2.8
72
+ ref_frame: 29
73
+ seed: 42
74
+ steps: 30
75
+ videos:
76
+ - configs/inference/metric/oATCBbieJIB8u3QAMAUwvMi9ymEOIc1AoDOajA.mp4
77
+ - configs/inference/metric/oonQq0HjAC7ExkJlRSMBBs1q3EIiQgFveLD7fD.mp4
78
+ - configs/inference/metric/os0aLDIkagGgAfAFQsfICCWMuoL8jm3IgJ0Wey.mp4
79
+ - configs/inference/metric/oYflAvAyfAIFRf3yQDrLRDCWcEDoFENF9tBEgg.mp4
80
+ pose_range:
81
+ - 0
82
+ - 24
83
+ test_cases:
84
+ - - ./configs/inference/ref_images/anyone-3.png
85
+ - ./configs/inference/pose_videos/demo18.mp4
86
+ - - ./configs/inference/ref_images/anyone-3-partial.png
87
+ - ./configs/inference/pose_videos/demo6.mp4
88
+ - - ./configs/inference/ref_images/anyone-2.png
89
+ - ./configs/inference/pose_videos/demo11.mp4
90
+ - - ./configs/inference/ref_images/anyone-1.png
91
+ - ./configs/inference/pose_videos/demo11.mp4
92
+ - - ./configs/inference/ref_images/anyone-5.png
93
+ - ./configs/inference/pose_videos/demo11.mp4
94
+ - - ./configs/inference/ref_images/anyone-11.png
95
+ - ./configs/inference/pose_videos/demo15.mp4
96
+ uniform_along_time: false
97
+ weight_dtype: fp16
20240504-stage1-51k-raw-opg/config.yaml ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model_path: ./pretrained_weights/sd-image-variations-diffusers
2
+ checkpointing_steps: 1000
3
+ controlnet_openpose_path: ./pretrained_weights/control_v11p_sd15_openpose/diffusion_pytorch_model.bin
4
+ data:
5
+ crop_scale:
6
+ - 0.8
7
+ - 1.2
8
+ do_center_crop: false
9
+ meta_paths:
10
+ - /workspace/develop/video/data/202403raw/meta.json
11
+ ref_augment:
12
+ downsample:
13
+ min_scale_logit: -1.2
14
+ p: 0.3
15
+ pan:
16
+ - 0.04
17
+ - 0.02
18
+ rotate: 8
19
+ scale:
20
+ - 0.9
21
+ - 1.2
22
+ sample_margin: 30
23
+ train_bs: 4
24
+ train_height: 1152
25
+ train_width: 768
26
+ enable_zero_snr: true
27
+ exp_name: stage1-51k-raw-opg
28
+ freeze_denoise: false
29
+ freeze_reference: false
30
+ image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
31
+ noise_offset: 0.05
32
+ noise_scheduler_kwargs:
33
+ beta_end: 0.012
34
+ beta_schedule: scaled_linear
35
+ beta_start: 0.00085
36
+ clip_sample: false
37
+ num_train_timesteps: 1000
38
+ steps_offset: 1
39
+ openpose_guider:
40
+ block_out_channels:
41
+ - 96
42
+ - 192
43
+ enable: true
44
+ model_path: ./pretrained_weights/body_pose_model.pth
45
+ output_dir: /workspace/camus/train
46
+ pose_guider_pretrain: true
47
+ resume_from_checkpoint: latest
48
+ save_dir: /workspace/camus/train/20240504-stage1-51k-raw-opg
49
+ save_model_epoch_interval: 1
50
+ seed: 12580
51
+ snr_gamma: 5.0
52
+ solver:
53
+ adam_beta1: 0.9
54
+ adam_beta2: 0.999
55
+ adam_epsilon: 1.0e-08
56
+ adam_weight_decay: 0.01
57
+ enable_xformers_memory_efficient_attention: true
58
+ gradient_accumulation_steps: 1
59
+ gradient_checkpointing: false
60
+ learning_rate: 1.0e-05
61
+ lr_scheduler: constant
62
+ lr_warmup_steps: 1
63
+ max_grad_norm: 1.0
64
+ max_train_steps: 100000
65
+ mixed_precision: fp16
66
+ scale_lr: false
67
+ use_8bit_adam: false
68
+ uncond_ratio: 0.1
69
+ vae_model_path: ./pretrained_weights/sd-vae-ft-mse
70
+ val:
71
+ validation_steps: 1000
72
+ validation:
73
+ metric:
74
+ batch_size: 4
75
+ generated_frames:
76
+ - 16
77
+ - 45
78
+ - 98
79
+ - 150
80
+ - 188
81
+ - 220
82
+ - 268
83
+ - 300
84
+ guidance_scale: 1.9
85
+ ref_frame: 28
86
+ seed: 42
87
+ size:
88
+ - 640
89
+ - 960
90
+ steps: 20
91
+ videos:
92
+ - configs/inference/metric/91HzMhq7eOS.mp4
93
+ - configs/inference/metric/A1T-Ea-FlQS.mp4
94
+ - configs/inference/metric/A1ubDo0PbQS.mp4
95
+ - configs/inference/metric/A1YNmKj0sCS.mp4
96
+ pose_image_paths:
97
+ - configs/inference/ref_images/91c+SL7Cg7S-98.png
98
+ - configs/inference/ref_images/91c+SL7Cg7S-150.png
99
+ - configs/inference/ref_images/91c+SL7Cg7S-220.png
100
+ ref_image_paths:
101
+ - configs/inference/ref_images/anyone-1.png
102
+ - configs/inference/ref_images/anyone-2.png
103
+ - configs/inference/ref_images/anyone-3.png
104
+ - configs/inference/ref_images/anyone-11.png
105
+ weight_dtype: fp16
20240508-stage1-openpg-nopaf/checkpoint-68000/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4992207980f1536bd01774c79570964dc5914869eacdbf4e036e5be6d3a08009
3
+ size 13608730321
20240508-stage1-openpg-nopaf/checkpoint-68000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10cba949e36100e45347b68dbb7c450f68f22ca6eddd374135e04a167d349939
3
+ size 7089007643
20240508-stage1-openpg-nopaf/checkpoint-68000/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:006807bad9ad34cbe6cea8f2852e1958e302fc1d48af87193038df01ebdf7f54
3
+ size 14663
20240508-stage1-openpg-nopaf/checkpoint-68000/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8044a6001b7773ad8995c7d8eaca15afedbae86f4988d9021814efe220547b7
3
+ size 557
20240508-stage1-openpg-nopaf/checkpoint-68000/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:531528e64a56b148ae89500f53709845c48cc71002664c9dc655272f32520b7a
3
+ size 563
20240508-stage1-openpg-nopaf/checkpoint-69000/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bf4e2162da621736604cc2fb72622ae9bd06c5f62135da6bec546d792f89da2
3
+ size 13608730321
20240508-stage1-openpg-nopaf/checkpoint-69000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74feaf9a2826975a96d2148c6768b6a36493b561fc973bda16d11df5d0cbc471
3
+ size 7089007643
20240508-stage1-openpg-nopaf/checkpoint-69000/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2fbfcb40a710a5f4392303e002cc3301bc6a3cb5f094e119b47fb82ff721292
3
+ size 14727
20240508-stage1-openpg-nopaf/checkpoint-69000/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d27fb664e5da2431936336dcfd332b3f059d007cac2905bf45738234c84c3618
3
+ size 557
20240508-stage1-openpg-nopaf/checkpoint-69000/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f47d91314bb33995bbc73a56304786baf645f2d50ad3fd39f0b8c360b7e8e85
3
+ size 563
20240508-stage1-openpg-nopaf/config.yaml ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model_path: ./pretrained_weights/sd-image-variations-diffusers
2
+ checkpointing_steps: 1000
3
+ data:
4
+ crop_scale:
5
+ - 0.8
6
+ - 1.2
7
+ do_center_crop: false
8
+ meta_paths:
9
+ - /workspace/develop/video/data/ubc_tiktok-dropout0.03/ubc-meta.json
10
+ - /workspace/develop/video/data/tiktok-dance/good-meta.json
11
+ - /workspace/develop/video/data/20240321/meta.json
12
+ - /workspace/develop/video/data/20240327/meta.json
13
+ ref_augment:
14
+ downsample:
15
+ min_scale_logit: -1.2
16
+ p: 0.3
17
+ pan:
18
+ - 0.04
19
+ - 0.02
20
+ rotate: 8
21
+ scale:
22
+ - 0.9
23
+ - 1.2
24
+ sample_margin: 30
25
+ train_bs: 4
26
+ train_height: 1152
27
+ train_width: 768
28
+ enable_zero_snr: true
29
+ exp_name: stage1-openpg-nopaf
30
+ image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
31
+ noise_offset: 0.05
32
+ noise_scheduler_kwargs:
33
+ beta_end: 0.012
34
+ beta_schedule: scaled_linear
35
+ beta_start: 0.00085
36
+ clip_sample: false
37
+ num_train_timesteps: 1000
38
+ steps_offset: 1
39
+ openpose_guider:
40
+ block_out_channels:
41
+ - 96
42
+ - 192
43
+ enable: true
44
+ exclude_paf: true
45
+ model_path: ./pretrained_weights/body_pose_model.pth
46
+ output_dir: /workspace/camus/train
47
+ pose_guider_pretrain: false
48
+ resume_from_checkpoint: latest
49
+ save_dir: /workspace/camus/train/20240508-stage1-openpg-nopaf
50
+ save_model_epoch_interval: 1
51
+ seed: 12580
52
+ snr_gamma: 5.0
53
+ solver:
54
+ adam_beta1: 0.9
55
+ adam_beta2: 0.999
56
+ adam_epsilon: 1.0e-08
57
+ adam_weight_decay: 0.01
58
+ enable_xformers_memory_efficient_attention: true
59
+ gradient_accumulation_steps: 1
60
+ gradient_checkpointing: false
61
+ learning_rate: 1.0e-05
62
+ lr_scheduler: constant
63
+ lr_warmup_steps: 1
64
+ max_grad_norm: 1.0
65
+ max_train_steps: 100000
66
+ mixed_precision: fp16
67
+ scale_lr: false
68
+ use_8bit_adam: false
69
+ uncond_ratio: 0.1
70
+ vae_model_path: ./pretrained_weights/sd-vae-ft-mse
71
+ val:
72
+ special_steps:
73
+ - 200
74
+ validation_steps: 1000
75
+ validation:
76
+ metric:
77
+ batch_size: 4
78
+ generated_frames:
79
+ - 16
80
+ - 45
81
+ - 98
82
+ - 150
83
+ - 188
84
+ - 220
85
+ - 268
86
+ - 300
87
+ guidance_scale: 2.8
88
+ ref_frame: 28
89
+ seed: 42
90
+ size:
91
+ - 640
92
+ - 960
93
+ steps: 20
94
+ videos:
95
+ - configs/inference/metric/91HzMhq7eOS.mp4
96
+ - configs/inference/metric/A1T-Ea-FlQS.mp4
97
+ - configs/inference/metric/A1ubDo0PbQS.mp4
98
+ - configs/inference/metric/A1YNmKj0sCS.mp4
99
+ pose_image_paths:
100
+ - configs/inference/ref_images/91c+SL7Cg7S-98.png
101
+ - configs/inference/ref_images/91c+SL7Cg7S-150.png
102
+ - configs/inference/ref_images/91c+SL7Cg7S-220.png
103
+ ref_image_paths:
104
+ - configs/inference/ref_images/anyone-1.png
105
+ - configs/inference/ref_images/anyone-2.png
106
+ - configs/inference/ref_images/anyone-3.png
107
+ - configs/inference/ref_images/anyone-11.png
108
+ weight_dtype: fp16
20240508-stage1-openpg-nopaf/denoising_unet-65381.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c863d62ce3d4338a23eb0d144e810b4a88eb9c1ce505f574bfdf5601244af887
3
+ size 3438374293
20240508-stage1-openpg-nopaf/denoising_unet-66980.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52632f6c8100a3b591211b521af2a55238f3bc8f405e07813fba81d64fd4bee2
3
+ size 3438374293
20240508-stage1-openpg-nopaf/denoising_unet-68579.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b981a2f1da3c8079614b855d4912b1893dc11ea45d693722c6825c2b4de0b77e
3
+ size 3438374293
20240508-stage1-openpg-nopaf/pose_guider-65381.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1f8aca9ca3f12cf87678561cc385c2a2240f6342645212fe4d1eaf6f5d63c65
3
+ size 212263301
20240508-stage1-openpg-nopaf/pose_guider-66980.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28e32fd312ab48515f8d6c89987a60075a57400493754999fd7a54dea28c6930
3
+ size 212263301
20240508-stage1-openpg-nopaf/pose_guider-68579.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fb06215f0e5c360108d597cdbb069d414c8520909b800f548dd7e1f449fbe21
3
+ size 212263301
20240508-stage1-openpg-nopaf/reference_unet-65381.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42ded4e21f6a816dc1520f8d17ab9aebfe6fe274462a3e22d4c9de156da7c78a
3
+ size 3438323817
20240508-stage1-openpg-nopaf/reference_unet-66980.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6cb760f41afd9af4aba70e3baecad276883c3047f1ffe435a12b780d7054df5
3
+ size 3438323817
20240508-stage1-openpg-nopaf/reference_unet-68579.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64466c7b2fc2d6d3a180e33cd392f33edda3c1e9223b9dc824a324d9f722cd75
3
+ size 3438323817
20240510-stage1-9k/config.yaml ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model_path: ./pretrained_weights/sd-image-variations-diffusers
2
+ checkpointing_steps: 1000
3
+ controlnet_openpose_path: ./pretrained_weights/control_v11p_sd15_openpose/diffusion_pytorch_model.bin
4
+ data:
5
+ crop_scale:
6
+ - 0.8
7
+ - 1.2
8
+ do_center_crop: false
9
+ meta_paths:
10
+ - /workspace/develop/video/data/ubc_tiktok-dropout0.03/ubc-meta.json
11
+ - /workspace/develop/video/data/tiktok-dance/good-meta.json
12
+ - /workspace/develop/video/data/20240321/meta.json
13
+ - /workspace/develop/video/data/20240327/meta.json
14
+ - /workspace/develop/video/data/20240506/meta.json
15
+ - /workspace/develop/video/data/20240509/meta.json
16
+ ref_augment:
17
+ downsample:
18
+ min_scale_logit: -1.2
19
+ p: 0.3
20
+ pan:
21
+ - 0.04
22
+ - 0.02
23
+ rotate: 8
24
+ scale:
25
+ - 0.9
26
+ - 1.2
27
+ sample_margin: 30
28
+ train_bs: 4
29
+ train_height: 1152
30
+ train_width: 768
31
+ enable_zero_snr: true
32
+ exp_name: stage1-9k
33
+ freeze_denoise: false
34
+ freeze_reference: false
35
+ image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
36
+ noise_offset: 0.05
37
+ noise_scheduler_kwargs:
38
+ beta_end: 0.012
39
+ beta_schedule: scaled_linear
40
+ beta_start: 0.00085
41
+ clip_sample: false
42
+ num_train_timesteps: 1000
43
+ steps_offset: 1
44
+ openpose_guider:
45
+ enable: false
46
+ output_dir: /workspace/camus/train
47
+ pose_guider_pretrain: true
48
+ resume_from_checkpoint: ''
49
+ save_model_epoch_interval: 1
50
+ seed: 12580
51
+ snr_gamma: 5.0
52
+ solver:
53
+ adam_beta1: 0.9
54
+ adam_beta2: 0.999
55
+ adam_epsilon: 1.0e-08
56
+ adam_weight_decay: 0.01
57
+ enable_xformers_memory_efficient_attention: true
58
+ gradient_accumulation_steps: 1
59
+ gradient_checkpointing: false
60
+ learning_rate: 1.0e-05
61
+ lr_scheduler: constant
62
+ lr_warmup_steps: 1
63
+ max_grad_norm: 1.0
64
+ max_train_steps: 100000
65
+ mixed_precision: fp16
66
+ scale_lr: false
67
+ use_8bit_adam: false
68
+ uncond_ratio: 0.1
69
+ vae_model_path: ./pretrained_weights/sd-vae-ft-mse
70
+ val:
71
+ validation_steps: 1000
72
+ validation:
73
+ metric:
74
+ batch_size: 4
75
+ generated_frames:
76
+ - 16
77
+ - 45
78
+ - 98
79
+ - 150
80
+ - 188
81
+ - 220
82
+ - 268
83
+ - 300
84
+ guidance_scale: 1.9
85
+ ref_frame: 28
86
+ seed: 42
87
+ size:
88
+ - 640
89
+ - 960
90
+ steps: 20
91
+ videos:
92
+ - configs/inference/metric/91HzMhq7eOS.mp4
93
+ - configs/inference/metric/A1T-Ea-FlQS.mp4
94
+ - configs/inference/metric/A1ubDo0PbQS.mp4
95
+ - configs/inference/metric/A1YNmKj0sCS.mp4
96
+ pose_image_paths:
97
+ - configs/inference/pose_images/A1eEZvfJRUS/frame70.png
98
+ - configs/inference/pose_images/A1eEZvfJRUS/frame150.png
99
+ - configs/inference/pose_images/A1eEZvfJRUS/frame190.png
100
+ ref_image_paths:
101
+ - configs/inference/ref_images/anyone-1.png
102
+ - configs/inference/ref_images/anyone-2.png
103
+ - configs/inference/ref_images/anyone-3.png
104
+ - configs/inference/ref_images/anyone-11.png
105
+ weight_dtype: fp16
20240513-stage2-9k/config.yaml ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model_path: ./pretrained_weights/stable-diffusion-v1-5
2
+ checkpointing_steps: 2000
3
+ data:
4
+ crop_scale:
5
+ - 1
6
+ - 1
7
+ do_center_crop: false
8
+ meta_paths:
9
+ - /workspace/develop/video/data/ubc_tiktok-dropout0.03/ubc-meta.json
10
+ - /workspace/develop/video/data/tiktok-dance/good-meta.json
11
+ - /workspace/develop/video/data/20240321/meta.json
12
+ - /workspace/develop/video/data/20240327/meta.json
13
+ - /workspace/develop/video/data/20240506/meta.json
14
+ - /workspace/develop/video/data/20240509/meta.json
15
+ n_sample_frames: 24
16
+ ref_augment:
17
+ pan:
18
+ - 0.04
19
+ - 0.04
20
+ rotate: 2
21
+ scale:
22
+ - 0.9
23
+ - 1.2
24
+ sample_rate: 4
25
+ train_bs: 1
26
+ train_height: 960
27
+ train_width: 640
28
+ enable_zero_snr: true
29
+ exp_name: stage2-9k
30
+ image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
31
+ mm_path: ./pretrained_weights/mm_sd_v15_v2.ckpt
32
+ noise_offset: 0.05
33
+ noise_scheduler_kwargs:
34
+ beta_end: 0.012
35
+ beta_schedule: linear
36
+ beta_start: 0.00085
37
+ clip_sample: false
38
+ num_train_timesteps: 1000
39
+ steps_offset: 1
40
+ output_dir: /workspace/camus/train
41
+ resume_from_checkpoint: ''
42
+ save_dir: /workspace/camus/train/20240513-stage2-9k
43
+ save_model_epoch_interval: 1
44
+ seed: 12580
45
+ snr_gamma: 5.0
46
+ solver:
47
+ adam_beta1: 0.9
48
+ adam_beta2: 0.999
49
+ adam_epsilon: 1.0e-08
50
+ adam_weight_decay: 0.01
51
+ enable_xformers_memory_efficient_attention: true
52
+ gradient_accumulation_steps: 1
53
+ gradient_checkpointing: true
54
+ learning_rate: 1.0e-05
55
+ lr_scheduler: constant
56
+ lr_warmup_steps: 1
57
+ max_grad_norm: 1.0
58
+ max_train_steps: 160000
59
+ mixed_precision: fp16
60
+ scale_lr: false
61
+ use_8bit_adam: true
62
+ stage1_ckpt_dir: /workspace/camus/train/20240510-stage1-9k
63
+ stage1_ckpt_step: 75392
64
+ uncond_ratio: 0.1
65
+ vae_model_path: ./pretrained_weights/sd-vae-ft-mse
66
+ val:
67
+ validation_steps: 1000
68
+ validation:
69
+ metric:
70
+ generate_frame_range:
71
+ - 30
72
+ - 54
73
+ guidance_scale: 2.8
74
+ ref_frame: 29
75
+ seed: 42
76
+ steps: 30
77
+ videos:
78
+ - configs/inference/metric/oATCBbieJIB8u3QAMAUwvMi9ymEOIc1AoDOajA.mp4
79
+ - configs/inference/metric/oonQq0HjAC7ExkJlRSMBBs1q3EIiQgFveLD7fD.mp4
80
+ - configs/inference/metric/os0aLDIkagGgAfAFQsfICCWMuoL8jm3IgJ0Wey.mp4
81
+ - configs/inference/metric/oYflAvAyfAIFRf3yQDrLRDCWcEDoFENF9tBEgg.mp4
82
+ pose_range:
83
+ - 0
84
+ - 24
85
+ test_cases:
86
+ - - ./configs/inference/ref_images/anyone-3.png
87
+ - ./configs/inference/pose_videos/demo18.mp4
88
+ - - ./configs/inference/ref_images/anyone-3-partial.png
89
+ - ./configs/inference/pose_videos/demo6.mp4
90
+ - - ./configs/inference/ref_images/anyone-2.png
91
+ - ./configs/inference/pose_videos/demo11.mp4
92
+ - - ./configs/inference/ref_images/anyone-1.png
93
+ - ./configs/inference/pose_videos/demo11.mp4
94
+ - - ./configs/inference/ref_images/anyone-5.png
95
+ - ./configs/inference/pose_videos/demo11.mp4
96
+ - - ./configs/inference/ref_images/anyone-11.png
97
+ - ./configs/inference/pose_videos/demo15.mp4
98
+ uniform_along_time: false
99
+ weight_dtype: fp16