Commit
·
5121da0
1
Parent(s):
438adad
train folder of 20240508 commit.
Browse files- 20240418-stage1-dance800/config.yaml +103 -0
- 20240422-stage1-ubc+td1/config.yaml +106 -0
- 20240423-stage1-ubc+td10/config.yaml +106 -0
- 20240425-stage2-openpg/config.yaml +92 -0
- 20240428-stage2-6k/config.yaml +97 -0
- 20240504-stage1-51k-raw-opg/config.yaml +105 -0
- 20240508-stage1-openpg-nopaf/checkpoint-68000/optimizer.bin +3 -0
- 20240508-stage1-openpg-nopaf/checkpoint-68000/pytorch_model.bin +3 -0
- 20240508-stage1-openpg-nopaf/checkpoint-68000/random_states_0.pkl +3 -0
- 20240508-stage1-openpg-nopaf/checkpoint-68000/scaler.pt +3 -0
- 20240508-stage1-openpg-nopaf/checkpoint-68000/scheduler.bin +3 -0
- 20240508-stage1-openpg-nopaf/checkpoint-69000/optimizer.bin +3 -0
- 20240508-stage1-openpg-nopaf/checkpoint-69000/pytorch_model.bin +3 -0
- 20240508-stage1-openpg-nopaf/checkpoint-69000/random_states_0.pkl +3 -0
- 20240508-stage1-openpg-nopaf/checkpoint-69000/scaler.pt +3 -0
- 20240508-stage1-openpg-nopaf/checkpoint-69000/scheduler.bin +3 -0
- 20240508-stage1-openpg-nopaf/config.yaml +108 -0
- 20240508-stage1-openpg-nopaf/denoising_unet-65381.pth +3 -0
- 20240508-stage1-openpg-nopaf/denoising_unet-66980.pth +3 -0
- 20240508-stage1-openpg-nopaf/denoising_unet-68579.pth +3 -0
- 20240508-stage1-openpg-nopaf/pose_guider-65381.pth +3 -0
- 20240508-stage1-openpg-nopaf/pose_guider-66980.pth +3 -0
- 20240508-stage1-openpg-nopaf/pose_guider-68579.pth +3 -0
- 20240508-stage1-openpg-nopaf/reference_unet-65381.pth +3 -0
- 20240508-stage1-openpg-nopaf/reference_unet-66980.pth +3 -0
- 20240508-stage1-openpg-nopaf/reference_unet-68579.pth +3 -0
- 20240510-stage1-9k/config.yaml +105 -0
- 20240513-stage2-9k/config.yaml +99 -0
20240418-stage1-dance800/config.yaml
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model_path: ./pretrained_weights/sd-image-variations-diffusers
|
2 |
+
checkpointing_steps: 1000
|
3 |
+
controlnet_openpose_path: ./pretrained_weights/control_v11p_sd15_openpose/diffusion_pytorch_model.bin
|
4 |
+
data:
|
5 |
+
crop_scale:
|
6 |
+
- 0.6
|
7 |
+
- 1
|
8 |
+
do_center_crop: false
|
9 |
+
meta_paths:
|
10 |
+
- /workspace/develop/video/data/tiktok-dance/good-meta.json
|
11 |
+
ref_augment:
|
12 |
+
downsample:
|
13 |
+
min_scale_logit: -1.2
|
14 |
+
p: 0.3
|
15 |
+
pan:
|
16 |
+
- 0.04
|
17 |
+
- 0.02
|
18 |
+
rotate: 8
|
19 |
+
scale:
|
20 |
+
- 0.9
|
21 |
+
- 1.4
|
22 |
+
sample_margin: 30
|
23 |
+
train_bs: 4
|
24 |
+
train_height: 1152
|
25 |
+
train_width: 768
|
26 |
+
enable_zero_snr: true
|
27 |
+
exp_name: stage1-dance800
|
28 |
+
freeze_denoise: false
|
29 |
+
freeze_reference: false
|
30 |
+
image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
|
31 |
+
noise_offset: 0.05
|
32 |
+
noise_scheduler_kwargs:
|
33 |
+
beta_end: 0.012
|
34 |
+
beta_schedule: scaled_linear
|
35 |
+
beta_start: 0.00085
|
36 |
+
clip_sample: false
|
37 |
+
num_train_timesteps: 1000
|
38 |
+
steps_offset: 1
|
39 |
+
openpose_guider:
|
40 |
+
enable: false
|
41 |
+
output_dir: /workspace/camus/train
|
42 |
+
pose_guider_pretrain: true
|
43 |
+
resume_from_checkpoint: latest
|
44 |
+
save_dir: /workspace/camus/train/20240418-stage1-dance800/
|
45 |
+
save_model_epoch_interval: 1
|
46 |
+
seed: 12580
|
47 |
+
snr_gamma: 5.0
|
48 |
+
solver:
|
49 |
+
adam_beta1: 0.9
|
50 |
+
adam_beta2: 0.999
|
51 |
+
adam_epsilon: 1.0e-08
|
52 |
+
adam_weight_decay: 0.01
|
53 |
+
enable_xformers_memory_efficient_attention: true
|
54 |
+
gradient_accumulation_steps: 1
|
55 |
+
gradient_checkpointing: false
|
56 |
+
learning_rate: 1.0e-05
|
57 |
+
lr_scheduler: constant
|
58 |
+
lr_warmup_steps: 1
|
59 |
+
max_grad_norm: 1.0
|
60 |
+
max_train_steps: 30000
|
61 |
+
mixed_precision: fp16
|
62 |
+
scale_lr: false
|
63 |
+
use_8bit_adam: false
|
64 |
+
uncond_ratio: 0.1
|
65 |
+
vae_model_path: ./pretrained_weights/sd-vae-ft-mse
|
66 |
+
val:
|
67 |
+
special_steps:
|
68 |
+
- 24001
|
69 |
+
validation_steps: 1000
|
70 |
+
validation:
|
71 |
+
metric:
|
72 |
+
batch_size: 4
|
73 |
+
generated_frames:
|
74 |
+
- 16
|
75 |
+
- 45
|
76 |
+
- 98
|
77 |
+
- 150
|
78 |
+
- 188
|
79 |
+
- 220
|
80 |
+
- 268
|
81 |
+
- 284
|
82 |
+
guidance_scale: 2.4
|
83 |
+
ref_frame: 16
|
84 |
+
seed: 42
|
85 |
+
size:
|
86 |
+
- 768
|
87 |
+
- 1152
|
88 |
+
steps: 20
|
89 |
+
videos:
|
90 |
+
- configs/inference/metric/o4flk5RPE4D4fgNEUNFRZIbOBjCsEgB9DQQQLA.mp4
|
91 |
+
- configs/inference/metric/ocMJyAEDjQzjwqAuIPABAvmRyofjKiYhBExati.mp4
|
92 |
+
- configs/inference/metric/ocQoBObnUgBnVskCnPe41sYRiBcAFD5f8AN1Rg.mp4
|
93 |
+
- configs/inference/metric/oE2tJFpDWANbMe7cxAg3hoq0QAPNeRnCQefG8F.mp4
|
94 |
+
- configs/inference/metric/oEOgTIKvy7lAQIfZ37E5BFmCVBID3gIQUQfMdv.mp4
|
95 |
+
- configs/inference/metric/oEtwozJ6AoIBJ6oyK6rAAQGAOiEWIQixF2F2fB.mp4
|
96 |
+
- configs/inference/metric/oUGmQqeqoAAega5fGgnAICfOWJRAAFTJCgDibU.mp4
|
97 |
+
- configs/inference/metric/oYQJ5zLiEgG71SAAFEfAeVIEVFROW4ZGQJTfeF.mp4
|
98 |
+
pose_image_paths:
|
99 |
+
- configs/inference/pose_images/A1eEZvfJRUS/frame70.png
|
100 |
+
ref_image_paths:
|
101 |
+
- configs/inference/ref_images/anyone-3.png
|
102 |
+
- configs/inference/ref_images/anyone-11.png
|
103 |
+
weight_dtype: fp16
|
20240422-stage1-ubc+td1/config.yaml
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model_path: ./pretrained_weights/sd-image-variations-diffusers
|
2 |
+
checkpointing_steps: 1000
|
3 |
+
controlnet_openpose_path: ./pretrained_weights/control_v11p_sd15_openpose/diffusion_pytorch_model.bin
|
4 |
+
data:
|
5 |
+
crop_scale:
|
6 |
+
- 0.8
|
7 |
+
- 1.2
|
8 |
+
do_center_crop: false
|
9 |
+
meta_paths:
|
10 |
+
- /workspace/develop/video/data/ubc_tiktok-dropout0.03/ubc-meta.json
|
11 |
+
- /workspace/develop/video/data/tiktok-dance/meta-1per_person.json
|
12 |
+
ref_augment:
|
13 |
+
downsample:
|
14 |
+
min_scale_logit: -1.2
|
15 |
+
p: 0.3
|
16 |
+
pan:
|
17 |
+
- 0.04
|
18 |
+
- 0.02
|
19 |
+
rotate: 8
|
20 |
+
scale:
|
21 |
+
- 0.9
|
22 |
+
- 1.2
|
23 |
+
sample_margin: 30
|
24 |
+
train_bs: 4
|
25 |
+
train_height: 1152
|
26 |
+
train_width: 768
|
27 |
+
enable_zero_snr: true
|
28 |
+
exp_name: stage1-ubc+td1
|
29 |
+
freeze_denoise: false
|
30 |
+
freeze_reference: false
|
31 |
+
image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
|
32 |
+
noise_offset: 0.05
|
33 |
+
noise_scheduler_kwargs:
|
34 |
+
beta_end: 0.012
|
35 |
+
beta_schedule: scaled_linear
|
36 |
+
beta_start: 0.00085
|
37 |
+
clip_sample: false
|
38 |
+
num_train_timesteps: 1000
|
39 |
+
steps_offset: 1
|
40 |
+
openpose_guider:
|
41 |
+
enable: false
|
42 |
+
output_dir: /workspace/camus/train
|
43 |
+
pose_guider_pretrain: true
|
44 |
+
resume_from_checkpoint: latest
|
45 |
+
save_dir: /workspace/camus/train/20240422-stage1-ubc+td1
|
46 |
+
save_model_epoch_interval: 1
|
47 |
+
seed: 12580
|
48 |
+
snr_gamma: 5.0
|
49 |
+
solver:
|
50 |
+
adam_beta1: 0.9
|
51 |
+
adam_beta2: 0.999
|
52 |
+
adam_epsilon: 1.0e-08
|
53 |
+
adam_weight_decay: 0.01
|
54 |
+
enable_xformers_memory_efficient_attention: true
|
55 |
+
gradient_accumulation_steps: 1
|
56 |
+
gradient_checkpointing: false
|
57 |
+
learning_rate: 1.0e-05
|
58 |
+
lr_scheduler: constant
|
59 |
+
lr_warmup_steps: 1
|
60 |
+
max_grad_norm: 1.0
|
61 |
+
max_train_steps: 30000
|
62 |
+
mixed_precision: fp16
|
63 |
+
scale_lr: false
|
64 |
+
use_8bit_adam: false
|
65 |
+
uncond_ratio: 0.1
|
66 |
+
vae_model_path: ./pretrained_weights/sd-vae-ft-mse
|
67 |
+
val:
|
68 |
+
validation_steps: 2000
|
69 |
+
validation:
|
70 |
+
metric:
|
71 |
+
batch_size: 4
|
72 |
+
generated_frames:
|
73 |
+
- 16
|
74 |
+
- 45
|
75 |
+
- 98
|
76 |
+
- 150
|
77 |
+
- 188
|
78 |
+
- 220
|
79 |
+
- 268
|
80 |
+
- 284
|
81 |
+
guidance_scale: 2.4
|
82 |
+
ref_frame: 16
|
83 |
+
seed: 42
|
84 |
+
size:
|
85 |
+
- 768
|
86 |
+
- 1152
|
87 |
+
steps: 20
|
88 |
+
videos:
|
89 |
+
- configs/inference/metric/o4flk5RPE4D4fgNEUNFRZIbOBjCsEgB9DQQQLA.mp4
|
90 |
+
- configs/inference/metric/ocMJyAEDjQzjwqAuIPABAvmRyofjKiYhBExati.mp4
|
91 |
+
- configs/inference/metric/ocQoBObnUgBnVskCnPe41sYRiBcAFD5f8AN1Rg.mp4
|
92 |
+
- configs/inference/metric/oE2tJFpDWANbMe7cxAg3hoq0QAPNeRnCQefG8F.mp4
|
93 |
+
- configs/inference/metric/oEOgTIKvy7lAQIfZ37E5BFmCVBID3gIQUQfMdv.mp4
|
94 |
+
- configs/inference/metric/oEtwozJ6AoIBJ6oyK6rAAQGAOiEWIQixF2F2fB.mp4
|
95 |
+
- configs/inference/metric/oUGmQqeqoAAega5fGgnAICfOWJRAAFTJCgDibU.mp4
|
96 |
+
- configs/inference/metric/oYQJ5zLiEgG71SAAFEfAeVIEVFROW4ZGQJTfeF.mp4
|
97 |
+
pose_image_paths:
|
98 |
+
- configs/inference/pose_images/A1eEZvfJRUS/frame70.png
|
99 |
+
- configs/inference/pose_images/A1eEZvfJRUS/frame150.png
|
100 |
+
- configs/inference/pose_images/A1eEZvfJRUS/frame190.png
|
101 |
+
ref_image_paths:
|
102 |
+
- configs/inference/ref_images/anyone-1.png
|
103 |
+
- configs/inference/ref_images/anyone-2.png
|
104 |
+
- configs/inference/ref_images/anyone-3.png
|
105 |
+
- configs/inference/ref_images/anyone-11.png
|
106 |
+
weight_dtype: fp16
|
20240423-stage1-ubc+td10/config.yaml
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model_path: ./pretrained_weights/sd-image-variations-diffusers
|
2 |
+
checkpointing_steps: 1000
|
3 |
+
controlnet_openpose_path: ./pretrained_weights/control_v11p_sd15_openpose/diffusion_pytorch_model.bin
|
4 |
+
data:
|
5 |
+
crop_scale:
|
6 |
+
- 0.8
|
7 |
+
- 1.2
|
8 |
+
do_center_crop: false
|
9 |
+
meta_paths:
|
10 |
+
- /workspace/develop/video/data/ubc_tiktok-dropout0.03/ubc-meta.json
|
11 |
+
- /workspace/develop/video/data/tiktok-dance/meta-10per_person.json
|
12 |
+
ref_augment:
|
13 |
+
downsample:
|
14 |
+
min_scale_logit: -1.2
|
15 |
+
p: 0.3
|
16 |
+
pan:
|
17 |
+
- 0.04
|
18 |
+
- 0.02
|
19 |
+
rotate: 8
|
20 |
+
scale:
|
21 |
+
- 0.9
|
22 |
+
- 1.2
|
23 |
+
sample_margin: 30
|
24 |
+
train_bs: 4
|
25 |
+
train_height: 1152
|
26 |
+
train_width: 768
|
27 |
+
enable_zero_snr: true
|
28 |
+
exp_name: stage1-ubc+td10
|
29 |
+
freeze_denoise: false
|
30 |
+
freeze_reference: false
|
31 |
+
image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
|
32 |
+
noise_offset: 0.05
|
33 |
+
noise_scheduler_kwargs:
|
34 |
+
beta_end: 0.012
|
35 |
+
beta_schedule: scaled_linear
|
36 |
+
beta_start: 0.00085
|
37 |
+
clip_sample: false
|
38 |
+
num_train_timesteps: 1000
|
39 |
+
steps_offset: 1
|
40 |
+
openpose_guider:
|
41 |
+
enable: false
|
42 |
+
output_dir: /workspace/camus/train
|
43 |
+
pose_guider_pretrain: true
|
44 |
+
resume_from_checkpoint: latest
|
45 |
+
save_dir: /workspace/camus/train/20240423-stage1-ubc+td10
|
46 |
+
save_model_epoch_interval: 1
|
47 |
+
seed: 12580
|
48 |
+
snr_gamma: 5.0
|
49 |
+
solver:
|
50 |
+
adam_beta1: 0.9
|
51 |
+
adam_beta2: 0.999
|
52 |
+
adam_epsilon: 1.0e-08
|
53 |
+
adam_weight_decay: 0.01
|
54 |
+
enable_xformers_memory_efficient_attention: true
|
55 |
+
gradient_accumulation_steps: 1
|
56 |
+
gradient_checkpointing: false
|
57 |
+
learning_rate: 1.0e-05
|
58 |
+
lr_scheduler: constant
|
59 |
+
lr_warmup_steps: 1
|
60 |
+
max_grad_norm: 1.0
|
61 |
+
max_train_steps: 30000
|
62 |
+
mixed_precision: fp16
|
63 |
+
scale_lr: false
|
64 |
+
use_8bit_adam: false
|
65 |
+
uncond_ratio: 0.1
|
66 |
+
vae_model_path: ./pretrained_weights/sd-vae-ft-mse
|
67 |
+
val:
|
68 |
+
validation_steps: 2000
|
69 |
+
validation:
|
70 |
+
metric:
|
71 |
+
batch_size: 4
|
72 |
+
generated_frames:
|
73 |
+
- 16
|
74 |
+
- 45
|
75 |
+
- 98
|
76 |
+
- 150
|
77 |
+
- 188
|
78 |
+
- 220
|
79 |
+
- 268
|
80 |
+
- 284
|
81 |
+
guidance_scale: 2.4
|
82 |
+
ref_frame: 16
|
83 |
+
seed: 42
|
84 |
+
size:
|
85 |
+
- 768
|
86 |
+
- 1152
|
87 |
+
steps: 20
|
88 |
+
videos:
|
89 |
+
- configs/inference/metric/o4flk5RPE4D4fgNEUNFRZIbOBjCsEgB9DQQQLA.mp4
|
90 |
+
- configs/inference/metric/ocMJyAEDjQzjwqAuIPABAvmRyofjKiYhBExati.mp4
|
91 |
+
- configs/inference/metric/ocQoBObnUgBnVskCnPe41sYRiBcAFD5f8AN1Rg.mp4
|
92 |
+
- configs/inference/metric/oE2tJFpDWANbMe7cxAg3hoq0QAPNeRnCQefG8F.mp4
|
93 |
+
- configs/inference/metric/oEOgTIKvy7lAQIfZ37E5BFmCVBID3gIQUQfMdv.mp4
|
94 |
+
- configs/inference/metric/oEtwozJ6AoIBJ6oyK6rAAQGAOiEWIQixF2F2fB.mp4
|
95 |
+
- configs/inference/metric/oUGmQqeqoAAega5fGgnAICfOWJRAAFTJCgDibU.mp4
|
96 |
+
- configs/inference/metric/oYQJ5zLiEgG71SAAFEfAeVIEVFROW4ZGQJTfeF.mp4
|
97 |
+
pose_image_paths:
|
98 |
+
- configs/inference/pose_images/A1eEZvfJRUS/frame70.png
|
99 |
+
- configs/inference/pose_images/A1eEZvfJRUS/frame150.png
|
100 |
+
- configs/inference/pose_images/A1eEZvfJRUS/frame190.png
|
101 |
+
ref_image_paths:
|
102 |
+
- configs/inference/ref_images/anyone-1.png
|
103 |
+
- configs/inference/ref_images/anyone-2.png
|
104 |
+
- configs/inference/ref_images/anyone-3.png
|
105 |
+
- configs/inference/ref_images/anyone-11.png
|
106 |
+
weight_dtype: fp16
|
20240425-stage2-openpg/config.yaml
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model_path: ./pretrained_weights/stable-diffusion-v1-5
|
2 |
+
checkpointing_steps: 2000
|
3 |
+
data:
|
4 |
+
crop_scale:
|
5 |
+
- 1
|
6 |
+
- 1
|
7 |
+
do_center_crop: false
|
8 |
+
meta_paths:
|
9 |
+
- /workspace/develop/video/data/ubc_tiktok-dropout0.03/ubc-meta.json
|
10 |
+
- /workspace/develop/video/data/tiktok-dance/good-meta.json
|
11 |
+
- /workspace/develop/video/data/20240321/meta.json
|
12 |
+
- /workspace/develop/video/data/20240327/meta.json
|
13 |
+
n_sample_frames: 24
|
14 |
+
ref_augment:
|
15 |
+
pan:
|
16 |
+
- 0.04
|
17 |
+
- 0.04
|
18 |
+
rotate: 2
|
19 |
+
scale:
|
20 |
+
- 0.9
|
21 |
+
- 1.0
|
22 |
+
sample_rate: 4
|
23 |
+
train_bs: 1
|
24 |
+
train_height: 960
|
25 |
+
train_width: 640
|
26 |
+
enable_zero_snr: true
|
27 |
+
exp_name: stage2-openpg
|
28 |
+
image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
|
29 |
+
mm_path: ./pretrained_weights/mm_sd_v15_v2.ckpt
|
30 |
+
noise_offset: 0.05
|
31 |
+
noise_scheduler_kwargs:
|
32 |
+
beta_end: 0.012
|
33 |
+
beta_schedule: linear
|
34 |
+
beta_start: 0.00085
|
35 |
+
clip_sample: false
|
36 |
+
num_train_timesteps: 1000
|
37 |
+
steps_offset: 1
|
38 |
+
openpose_guider:
|
39 |
+
block_out_channels:
|
40 |
+
- 96
|
41 |
+
- 192
|
42 |
+
enable: true
|
43 |
+
output_dir: /workspace/camus/train
|
44 |
+
resume_from_checkpoint: latest
|
45 |
+
save_dir: /workspace/camus/train/20240425-stage2-openpg
|
46 |
+
save_model_epoch_interval: 1
|
47 |
+
seed: 12580
|
48 |
+
snr_gamma: 5.0
|
49 |
+
solver:
|
50 |
+
adam_beta1: 0.9
|
51 |
+
adam_beta2: 0.999
|
52 |
+
adam_epsilon: 1.0e-08
|
53 |
+
adam_weight_decay: 0.01
|
54 |
+
enable_xformers_memory_efficient_attention: true
|
55 |
+
gradient_accumulation_steps: 1
|
56 |
+
gradient_checkpointing: true
|
57 |
+
learning_rate: 1.0e-05
|
58 |
+
lr_scheduler: constant
|
59 |
+
lr_warmup_steps: 1
|
60 |
+
max_grad_norm: 1.0
|
61 |
+
max_train_steps: 160000
|
62 |
+
mixed_precision: fp16
|
63 |
+
scale_lr: false
|
64 |
+
use_8bit_adam: true
|
65 |
+
stage1_ckpt_dir: /workspace/camus/train/20240418-stage1-openpg-c96_192
|
66 |
+
stage1_ckpt_step: 86396
|
67 |
+
uncond_ratio: 0.1
|
68 |
+
vae_model_path: ./pretrained_weights/sd-vae-ft-mse
|
69 |
+
val:
|
70 |
+
validation_steps: 1000
|
71 |
+
validation:
|
72 |
+
metric:
|
73 |
+
generate_frame_range:
|
74 |
+
- 50
|
75 |
+
- 74
|
76 |
+
guidance_scale: 2.4
|
77 |
+
ref_frame: 29
|
78 |
+
seed: 42
|
79 |
+
steps: 20
|
80 |
+
videos:
|
81 |
+
- configs/inference/metric/A1ubDo0PbQS.mp4
|
82 |
+
- configs/inference/metric/oEtwozJ6AoIBJ6oyK6rAAQGAOiEWIQixF2F2fB.mp4
|
83 |
+
- configs/inference/metric/oEOgTIKvy7lAQIfZ37E5BFmCVBID3gIQUQfMdv.mp4
|
84 |
+
- configs/inference/metric/ocQoBObnUgBnVskCnPe41sYRiBcAFD5f8AN1Rg.mp4
|
85 |
+
pose_range:
|
86 |
+
- 24
|
87 |
+
- 48
|
88 |
+
test_cases:
|
89 |
+
- - ./configs/inference/ref_images/anyone-2.png
|
90 |
+
- ./configs/inference/metric/91HzMhq7eOS.mp4
|
91 |
+
uniform_along_time: false
|
92 |
+
weight_dtype: fp16
|
20240428-stage2-6k/config.yaml
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model_path: ./pretrained_weights/stable-diffusion-v1-5
|
2 |
+
checkpointing_steps: 2000
|
3 |
+
data:
|
4 |
+
crop_scale:
|
5 |
+
- 1
|
6 |
+
- 1
|
7 |
+
do_center_crop: false
|
8 |
+
meta_paths:
|
9 |
+
- /workspace/develop/video/data/ubc_tiktok-dropout0.03/ubc-meta.json
|
10 |
+
- /workspace/develop/video/data/tiktok-dance/good-meta.json
|
11 |
+
- /workspace/develop/video/data/20240321/meta.json
|
12 |
+
- /workspace/develop/video/data/20240327/meta.json
|
13 |
+
n_sample_frames: 24
|
14 |
+
ref_augment:
|
15 |
+
pan:
|
16 |
+
- 0.04
|
17 |
+
- 0.04
|
18 |
+
rotate: 2
|
19 |
+
scale:
|
20 |
+
- 0.9
|
21 |
+
- 1.2
|
22 |
+
sample_rate: 4
|
23 |
+
train_bs: 1
|
24 |
+
train_height: 960
|
25 |
+
train_width: 640
|
26 |
+
enable_zero_snr: true
|
27 |
+
exp_name: stage2-6k
|
28 |
+
image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
|
29 |
+
mm_path: ./pretrained_weights/mm_sd_v15_v2.ckpt
|
30 |
+
noise_offset: 0.05
|
31 |
+
noise_scheduler_kwargs:
|
32 |
+
beta_end: 0.012
|
33 |
+
beta_schedule: linear
|
34 |
+
beta_start: 0.00085
|
35 |
+
clip_sample: false
|
36 |
+
num_train_timesteps: 1000
|
37 |
+
steps_offset: 1
|
38 |
+
output_dir: /workspace/camus/train
|
39 |
+
resume_from_checkpoint: latest
|
40 |
+
save_dir: /workspace/camus/train/20240428-stage2-6k
|
41 |
+
save_model_epoch_interval: 1
|
42 |
+
seed: 12580
|
43 |
+
snr_gamma: 5.0
|
44 |
+
solver:
|
45 |
+
adam_beta1: 0.9
|
46 |
+
adam_beta2: 0.999
|
47 |
+
adam_epsilon: 1.0e-08
|
48 |
+
adam_weight_decay: 0.01
|
49 |
+
enable_xformers_memory_efficient_attention: true
|
50 |
+
gradient_accumulation_steps: 1
|
51 |
+
gradient_checkpointing: true
|
52 |
+
learning_rate: 1.0e-05
|
53 |
+
lr_scheduler: constant
|
54 |
+
lr_warmup_steps: 1
|
55 |
+
max_grad_norm: 1.0
|
56 |
+
max_train_steps: 160000
|
57 |
+
mixed_precision: fp16
|
58 |
+
scale_lr: false
|
59 |
+
use_8bit_adam: true
|
60 |
+
stage1_ckpt_dir: /workspace/camus/train/20240421-stage1-6k
|
61 |
+
stage1_ckpt_step: 78782
|
62 |
+
uncond_ratio: 0.1
|
63 |
+
vae_model_path: ./pretrained_weights/sd-vae-ft-mse
|
64 |
+
val:
|
65 |
+
validation_steps: 1000
|
66 |
+
validation:
|
67 |
+
metric:
|
68 |
+
generate_frame_range:
|
69 |
+
- 30
|
70 |
+
- 54
|
71 |
+
guidance_scale: 2.8
|
72 |
+
ref_frame: 29
|
73 |
+
seed: 42
|
74 |
+
steps: 30
|
75 |
+
videos:
|
76 |
+
- configs/inference/metric/oATCBbieJIB8u3QAMAUwvMi9ymEOIc1AoDOajA.mp4
|
77 |
+
- configs/inference/metric/oonQq0HjAC7ExkJlRSMBBs1q3EIiQgFveLD7fD.mp4
|
78 |
+
- configs/inference/metric/os0aLDIkagGgAfAFQsfICCWMuoL8jm3IgJ0Wey.mp4
|
79 |
+
- configs/inference/metric/oYflAvAyfAIFRf3yQDrLRDCWcEDoFENF9tBEgg.mp4
|
80 |
+
pose_range:
|
81 |
+
- 0
|
82 |
+
- 24
|
83 |
+
test_cases:
|
84 |
+
- - ./configs/inference/ref_images/anyone-3.png
|
85 |
+
- ./configs/inference/pose_videos/demo18.mp4
|
86 |
+
- - ./configs/inference/ref_images/anyone-3-partial.png
|
87 |
+
- ./configs/inference/pose_videos/demo6.mp4
|
88 |
+
- - ./configs/inference/ref_images/anyone-2.png
|
89 |
+
- ./configs/inference/pose_videos/demo11.mp4
|
90 |
+
- - ./configs/inference/ref_images/anyone-1.png
|
91 |
+
- ./configs/inference/pose_videos/demo11.mp4
|
92 |
+
- - ./configs/inference/ref_images/anyone-5.png
|
93 |
+
- ./configs/inference/pose_videos/demo11.mp4
|
94 |
+
- - ./configs/inference/ref_images/anyone-11.png
|
95 |
+
- ./configs/inference/pose_videos/demo15.mp4
|
96 |
+
uniform_along_time: false
|
97 |
+
weight_dtype: fp16
|
20240504-stage1-51k-raw-opg/config.yaml
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model_path: ./pretrained_weights/sd-image-variations-diffusers
|
2 |
+
checkpointing_steps: 1000
|
3 |
+
controlnet_openpose_path: ./pretrained_weights/control_v11p_sd15_openpose/diffusion_pytorch_model.bin
|
4 |
+
data:
|
5 |
+
crop_scale:
|
6 |
+
- 0.8
|
7 |
+
- 1.2
|
8 |
+
do_center_crop: false
|
9 |
+
meta_paths:
|
10 |
+
- /workspace/develop/video/data/202403raw/meta.json
|
11 |
+
ref_augment:
|
12 |
+
downsample:
|
13 |
+
min_scale_logit: -1.2
|
14 |
+
p: 0.3
|
15 |
+
pan:
|
16 |
+
- 0.04
|
17 |
+
- 0.02
|
18 |
+
rotate: 8
|
19 |
+
scale:
|
20 |
+
- 0.9
|
21 |
+
- 1.2
|
22 |
+
sample_margin: 30
|
23 |
+
train_bs: 4
|
24 |
+
train_height: 1152
|
25 |
+
train_width: 768
|
26 |
+
enable_zero_snr: true
|
27 |
+
exp_name: stage1-51k-raw-opg
|
28 |
+
freeze_denoise: false
|
29 |
+
freeze_reference: false
|
30 |
+
image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
|
31 |
+
noise_offset: 0.05
|
32 |
+
noise_scheduler_kwargs:
|
33 |
+
beta_end: 0.012
|
34 |
+
beta_schedule: scaled_linear
|
35 |
+
beta_start: 0.00085
|
36 |
+
clip_sample: false
|
37 |
+
num_train_timesteps: 1000
|
38 |
+
steps_offset: 1
|
39 |
+
openpose_guider:
|
40 |
+
block_out_channels:
|
41 |
+
- 96
|
42 |
+
- 192
|
43 |
+
enable: true
|
44 |
+
model_path: ./pretrained_weights/body_pose_model.pth
|
45 |
+
output_dir: /workspace/camus/train
|
46 |
+
pose_guider_pretrain: true
|
47 |
+
resume_from_checkpoint: latest
|
48 |
+
save_dir: /workspace/camus/train/20240504-stage1-51k-raw-opg
|
49 |
+
save_model_epoch_interval: 1
|
50 |
+
seed: 12580
|
51 |
+
snr_gamma: 5.0
|
52 |
+
solver:
|
53 |
+
adam_beta1: 0.9
|
54 |
+
adam_beta2: 0.999
|
55 |
+
adam_epsilon: 1.0e-08
|
56 |
+
adam_weight_decay: 0.01
|
57 |
+
enable_xformers_memory_efficient_attention: true
|
58 |
+
gradient_accumulation_steps: 1
|
59 |
+
gradient_checkpointing: false
|
60 |
+
learning_rate: 1.0e-05
|
61 |
+
lr_scheduler: constant
|
62 |
+
lr_warmup_steps: 1
|
63 |
+
max_grad_norm: 1.0
|
64 |
+
max_train_steps: 100000
|
65 |
+
mixed_precision: fp16
|
66 |
+
scale_lr: false
|
67 |
+
use_8bit_adam: false
|
68 |
+
uncond_ratio: 0.1
|
69 |
+
vae_model_path: ./pretrained_weights/sd-vae-ft-mse
|
70 |
+
val:
|
71 |
+
validation_steps: 1000
|
72 |
+
validation:
|
73 |
+
metric:
|
74 |
+
batch_size: 4
|
75 |
+
generated_frames:
|
76 |
+
- 16
|
77 |
+
- 45
|
78 |
+
- 98
|
79 |
+
- 150
|
80 |
+
- 188
|
81 |
+
- 220
|
82 |
+
- 268
|
83 |
+
- 300
|
84 |
+
guidance_scale: 1.9
|
85 |
+
ref_frame: 28
|
86 |
+
seed: 42
|
87 |
+
size:
|
88 |
+
- 640
|
89 |
+
- 960
|
90 |
+
steps: 20
|
91 |
+
videos:
|
92 |
+
- configs/inference/metric/91HzMhq7eOS.mp4
|
93 |
+
- configs/inference/metric/A1T-Ea-FlQS.mp4
|
94 |
+
- configs/inference/metric/A1ubDo0PbQS.mp4
|
95 |
+
- configs/inference/metric/A1YNmKj0sCS.mp4
|
96 |
+
pose_image_paths:
|
97 |
+
- configs/inference/ref_images/91c+SL7Cg7S-98.png
|
98 |
+
- configs/inference/ref_images/91c+SL7Cg7S-150.png
|
99 |
+
- configs/inference/ref_images/91c+SL7Cg7S-220.png
|
100 |
+
ref_image_paths:
|
101 |
+
- configs/inference/ref_images/anyone-1.png
|
102 |
+
- configs/inference/ref_images/anyone-2.png
|
103 |
+
- configs/inference/ref_images/anyone-3.png
|
104 |
+
- configs/inference/ref_images/anyone-11.png
|
105 |
+
weight_dtype: fp16
|
20240508-stage1-openpg-nopaf/checkpoint-68000/optimizer.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4992207980f1536bd01774c79570964dc5914869eacdbf4e036e5be6d3a08009
|
3 |
+
size 13608730321
|
20240508-stage1-openpg-nopaf/checkpoint-68000/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10cba949e36100e45347b68dbb7c450f68f22ca6eddd374135e04a167d349939
|
3 |
+
size 7089007643
|
20240508-stage1-openpg-nopaf/checkpoint-68000/random_states_0.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:006807bad9ad34cbe6cea8f2852e1958e302fc1d48af87193038df01ebdf7f54
|
3 |
+
size 14663
|
20240508-stage1-openpg-nopaf/checkpoint-68000/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8044a6001b7773ad8995c7d8eaca15afedbae86f4988d9021814efe220547b7
|
3 |
+
size 557
|
20240508-stage1-openpg-nopaf/checkpoint-68000/scheduler.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:531528e64a56b148ae89500f53709845c48cc71002664c9dc655272f32520b7a
|
3 |
+
size 563
|
20240508-stage1-openpg-nopaf/checkpoint-69000/optimizer.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5bf4e2162da621736604cc2fb72622ae9bd06c5f62135da6bec546d792f89da2
|
3 |
+
size 13608730321
|
20240508-stage1-openpg-nopaf/checkpoint-69000/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74feaf9a2826975a96d2148c6768b6a36493b561fc973bda16d11df5d0cbc471
|
3 |
+
size 7089007643
|
20240508-stage1-openpg-nopaf/checkpoint-69000/random_states_0.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2fbfcb40a710a5f4392303e002cc3301bc6a3cb5f094e119b47fb82ff721292
|
3 |
+
size 14727
|
20240508-stage1-openpg-nopaf/checkpoint-69000/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d27fb664e5da2431936336dcfd332b3f059d007cac2905bf45738234c84c3618
|
3 |
+
size 557
|
20240508-stage1-openpg-nopaf/checkpoint-69000/scheduler.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f47d91314bb33995bbc73a56304786baf645f2d50ad3fd39f0b8c360b7e8e85
|
3 |
+
size 563
|
20240508-stage1-openpg-nopaf/config.yaml
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model_path: ./pretrained_weights/sd-image-variations-diffusers
|
2 |
+
checkpointing_steps: 1000
|
3 |
+
data:
|
4 |
+
crop_scale:
|
5 |
+
- 0.8
|
6 |
+
- 1.2
|
7 |
+
do_center_crop: false
|
8 |
+
meta_paths:
|
9 |
+
- /workspace/develop/video/data/ubc_tiktok-dropout0.03/ubc-meta.json
|
10 |
+
- /workspace/develop/video/data/tiktok-dance/good-meta.json
|
11 |
+
- /workspace/develop/video/data/20240321/meta.json
|
12 |
+
- /workspace/develop/video/data/20240327/meta.json
|
13 |
+
ref_augment:
|
14 |
+
downsample:
|
15 |
+
min_scale_logit: -1.2
|
16 |
+
p: 0.3
|
17 |
+
pan:
|
18 |
+
- 0.04
|
19 |
+
- 0.02
|
20 |
+
rotate: 8
|
21 |
+
scale:
|
22 |
+
- 0.9
|
23 |
+
- 1.2
|
24 |
+
sample_margin: 30
|
25 |
+
train_bs: 4
|
26 |
+
train_height: 1152
|
27 |
+
train_width: 768
|
28 |
+
enable_zero_snr: true
|
29 |
+
exp_name: stage1-openpg-nopaf
|
30 |
+
image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
|
31 |
+
noise_offset: 0.05
|
32 |
+
noise_scheduler_kwargs:
|
33 |
+
beta_end: 0.012
|
34 |
+
beta_schedule: scaled_linear
|
35 |
+
beta_start: 0.00085
|
36 |
+
clip_sample: false
|
37 |
+
num_train_timesteps: 1000
|
38 |
+
steps_offset: 1
|
39 |
+
openpose_guider:
|
40 |
+
block_out_channels:
|
41 |
+
- 96
|
42 |
+
- 192
|
43 |
+
enable: true
|
44 |
+
exclude_paf: true
|
45 |
+
model_path: ./pretrained_weights/body_pose_model.pth
|
46 |
+
output_dir: /workspace/camus/train
|
47 |
+
pose_guider_pretrain: false
|
48 |
+
resume_from_checkpoint: latest
|
49 |
+
save_dir: /workspace/camus/train/20240508-stage1-openpg-nopaf
|
50 |
+
save_model_epoch_interval: 1
|
51 |
+
seed: 12580
|
52 |
+
snr_gamma: 5.0
|
53 |
+
solver:
|
54 |
+
adam_beta1: 0.9
|
55 |
+
adam_beta2: 0.999
|
56 |
+
adam_epsilon: 1.0e-08
|
57 |
+
adam_weight_decay: 0.01
|
58 |
+
enable_xformers_memory_efficient_attention: true
|
59 |
+
gradient_accumulation_steps: 1
|
60 |
+
gradient_checkpointing: false
|
61 |
+
learning_rate: 1.0e-05
|
62 |
+
lr_scheduler: constant
|
63 |
+
lr_warmup_steps: 1
|
64 |
+
max_grad_norm: 1.0
|
65 |
+
max_train_steps: 100000
|
66 |
+
mixed_precision: fp16
|
67 |
+
scale_lr: false
|
68 |
+
use_8bit_adam: false
|
69 |
+
uncond_ratio: 0.1
|
70 |
+
vae_model_path: ./pretrained_weights/sd-vae-ft-mse
|
71 |
+
val:
|
72 |
+
special_steps:
|
73 |
+
- 200
|
74 |
+
validation_steps: 1000
|
75 |
+
validation:
|
76 |
+
metric:
|
77 |
+
batch_size: 4
|
78 |
+
generated_frames:
|
79 |
+
- 16
|
80 |
+
- 45
|
81 |
+
- 98
|
82 |
+
- 150
|
83 |
+
- 188
|
84 |
+
- 220
|
85 |
+
- 268
|
86 |
+
- 300
|
87 |
+
guidance_scale: 2.8
|
88 |
+
ref_frame: 28
|
89 |
+
seed: 42
|
90 |
+
size:
|
91 |
+
- 640
|
92 |
+
- 960
|
93 |
+
steps: 20
|
94 |
+
videos:
|
95 |
+
- configs/inference/metric/91HzMhq7eOS.mp4
|
96 |
+
- configs/inference/metric/A1T-Ea-FlQS.mp4
|
97 |
+
- configs/inference/metric/A1ubDo0PbQS.mp4
|
98 |
+
- configs/inference/metric/A1YNmKj0sCS.mp4
|
99 |
+
pose_image_paths:
|
100 |
+
- configs/inference/ref_images/91c+SL7Cg7S-98.png
|
101 |
+
- configs/inference/ref_images/91c+SL7Cg7S-150.png
|
102 |
+
- configs/inference/ref_images/91c+SL7Cg7S-220.png
|
103 |
+
ref_image_paths:
|
104 |
+
- configs/inference/ref_images/anyone-1.png
|
105 |
+
- configs/inference/ref_images/anyone-2.png
|
106 |
+
- configs/inference/ref_images/anyone-3.png
|
107 |
+
- configs/inference/ref_images/anyone-11.png
|
108 |
+
weight_dtype: fp16
|
20240508-stage1-openpg-nopaf/denoising_unet-65381.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c863d62ce3d4338a23eb0d144e810b4a88eb9c1ce505f574bfdf5601244af887
|
3 |
+
size 3438374293
|
20240508-stage1-openpg-nopaf/denoising_unet-66980.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:52632f6c8100a3b591211b521af2a55238f3bc8f405e07813fba81d64fd4bee2
|
3 |
+
size 3438374293
|
20240508-stage1-openpg-nopaf/denoising_unet-68579.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b981a2f1da3c8079614b855d4912b1893dc11ea45d693722c6825c2b4de0b77e
|
3 |
+
size 3438374293
|
20240508-stage1-openpg-nopaf/pose_guider-65381.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1f8aca9ca3f12cf87678561cc385c2a2240f6342645212fe4d1eaf6f5d63c65
|
3 |
+
size 212263301
|
20240508-stage1-openpg-nopaf/pose_guider-66980.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28e32fd312ab48515f8d6c89987a60075a57400493754999fd7a54dea28c6930
|
3 |
+
size 212263301
|
20240508-stage1-openpg-nopaf/pose_guider-68579.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2fb06215f0e5c360108d597cdbb069d414c8520909b800f548dd7e1f449fbe21
|
3 |
+
size 212263301
|
20240508-stage1-openpg-nopaf/reference_unet-65381.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:42ded4e21f6a816dc1520f8d17ab9aebfe6fe274462a3e22d4c9de156da7c78a
|
3 |
+
size 3438323817
|
20240508-stage1-openpg-nopaf/reference_unet-66980.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c6cb760f41afd9af4aba70e3baecad276883c3047f1ffe435a12b780d7054df5
|
3 |
+
size 3438323817
|
20240508-stage1-openpg-nopaf/reference_unet-68579.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64466c7b2fc2d6d3a180e33cd392f33edda3c1e9223b9dc824a324d9f722cd75
|
3 |
+
size 3438323817
|
20240510-stage1-9k/config.yaml
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model_path: ./pretrained_weights/sd-image-variations-diffusers
|
2 |
+
checkpointing_steps: 1000
|
3 |
+
controlnet_openpose_path: ./pretrained_weights/control_v11p_sd15_openpose/diffusion_pytorch_model.bin
|
4 |
+
data:
|
5 |
+
crop_scale:
|
6 |
+
- 0.8
|
7 |
+
- 1.2
|
8 |
+
do_center_crop: false
|
9 |
+
meta_paths:
|
10 |
+
- /workspace/develop/video/data/ubc_tiktok-dropout0.03/ubc-meta.json
|
11 |
+
- /workspace/develop/video/data/tiktok-dance/good-meta.json
|
12 |
+
- /workspace/develop/video/data/20240321/meta.json
|
13 |
+
- /workspace/develop/video/data/20240327/meta.json
|
14 |
+
- /workspace/develop/video/data/20240506/meta.json
|
15 |
+
- /workspace/develop/video/data/20240509/meta.json
|
16 |
+
ref_augment:
|
17 |
+
downsample:
|
18 |
+
min_scale_logit: -1.2
|
19 |
+
p: 0.3
|
20 |
+
pan:
|
21 |
+
- 0.04
|
22 |
+
- 0.02
|
23 |
+
rotate: 8
|
24 |
+
scale:
|
25 |
+
- 0.9
|
26 |
+
- 1.2
|
27 |
+
sample_margin: 30
|
28 |
+
train_bs: 4
|
29 |
+
train_height: 1152
|
30 |
+
train_width: 768
|
31 |
+
enable_zero_snr: true
|
32 |
+
exp_name: stage1-9k
|
33 |
+
freeze_denoise: false
|
34 |
+
freeze_reference: false
|
35 |
+
image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
|
36 |
+
noise_offset: 0.05
|
37 |
+
noise_scheduler_kwargs:
|
38 |
+
beta_end: 0.012
|
39 |
+
beta_schedule: scaled_linear
|
40 |
+
beta_start: 0.00085
|
41 |
+
clip_sample: false
|
42 |
+
num_train_timesteps: 1000
|
43 |
+
steps_offset: 1
|
44 |
+
openpose_guider:
|
45 |
+
enable: false
|
46 |
+
output_dir: /workspace/camus/train
|
47 |
+
pose_guider_pretrain: true
|
48 |
+
resume_from_checkpoint: ''
|
49 |
+
save_model_epoch_interval: 1
|
50 |
+
seed: 12580
|
51 |
+
snr_gamma: 5.0
|
52 |
+
solver:
|
53 |
+
adam_beta1: 0.9
|
54 |
+
adam_beta2: 0.999
|
55 |
+
adam_epsilon: 1.0e-08
|
56 |
+
adam_weight_decay: 0.01
|
57 |
+
enable_xformers_memory_efficient_attention: true
|
58 |
+
gradient_accumulation_steps: 1
|
59 |
+
gradient_checkpointing: false
|
60 |
+
learning_rate: 1.0e-05
|
61 |
+
lr_scheduler: constant
|
62 |
+
lr_warmup_steps: 1
|
63 |
+
max_grad_norm: 1.0
|
64 |
+
max_train_steps: 100000
|
65 |
+
mixed_precision: fp16
|
66 |
+
scale_lr: false
|
67 |
+
use_8bit_adam: false
|
68 |
+
uncond_ratio: 0.1
|
69 |
+
vae_model_path: ./pretrained_weights/sd-vae-ft-mse
|
70 |
+
val:
|
71 |
+
validation_steps: 1000
|
72 |
+
validation:
|
73 |
+
metric:
|
74 |
+
batch_size: 4
|
75 |
+
generated_frames:
|
76 |
+
- 16
|
77 |
+
- 45
|
78 |
+
- 98
|
79 |
+
- 150
|
80 |
+
- 188
|
81 |
+
- 220
|
82 |
+
- 268
|
83 |
+
- 300
|
84 |
+
guidance_scale: 1.9
|
85 |
+
ref_frame: 28
|
86 |
+
seed: 42
|
87 |
+
size:
|
88 |
+
- 640
|
89 |
+
- 960
|
90 |
+
steps: 20
|
91 |
+
videos:
|
92 |
+
- configs/inference/metric/91HzMhq7eOS.mp4
|
93 |
+
- configs/inference/metric/A1T-Ea-FlQS.mp4
|
94 |
+
- configs/inference/metric/A1ubDo0PbQS.mp4
|
95 |
+
- configs/inference/metric/A1YNmKj0sCS.mp4
|
96 |
+
pose_image_paths:
|
97 |
+
- configs/inference/pose_images/A1eEZvfJRUS/frame70.png
|
98 |
+
- configs/inference/pose_images/A1eEZvfJRUS/frame150.png
|
99 |
+
- configs/inference/pose_images/A1eEZvfJRUS/frame190.png
|
100 |
+
ref_image_paths:
|
101 |
+
- configs/inference/ref_images/anyone-1.png
|
102 |
+
- configs/inference/ref_images/anyone-2.png
|
103 |
+
- configs/inference/ref_images/anyone-3.png
|
104 |
+
- configs/inference/ref_images/anyone-11.png
|
105 |
+
weight_dtype: fp16
|
20240513-stage2-9k/config.yaml
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
base_model_path: ./pretrained_weights/stable-diffusion-v1-5
|
2 |
+
checkpointing_steps: 2000
|
3 |
+
data:
|
4 |
+
crop_scale:
|
5 |
+
- 1
|
6 |
+
- 1
|
7 |
+
do_center_crop: false
|
8 |
+
meta_paths:
|
9 |
+
- /workspace/develop/video/data/ubc_tiktok-dropout0.03/ubc-meta.json
|
10 |
+
- /workspace/develop/video/data/tiktok-dance/good-meta.json
|
11 |
+
- /workspace/develop/video/data/20240321/meta.json
|
12 |
+
- /workspace/develop/video/data/20240327/meta.json
|
13 |
+
- /workspace/develop/video/data/20240506/meta.json
|
14 |
+
- /workspace/develop/video/data/20240509/meta.json
|
15 |
+
n_sample_frames: 24
|
16 |
+
ref_augment:
|
17 |
+
pan:
|
18 |
+
- 0.04
|
19 |
+
- 0.04
|
20 |
+
rotate: 2
|
21 |
+
scale:
|
22 |
+
- 0.9
|
23 |
+
- 1.2
|
24 |
+
sample_rate: 4
|
25 |
+
train_bs: 1
|
26 |
+
train_height: 960
|
27 |
+
train_width: 640
|
28 |
+
enable_zero_snr: true
|
29 |
+
exp_name: stage2-9k
|
30 |
+
image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
|
31 |
+
mm_path: ./pretrained_weights/mm_sd_v15_v2.ckpt
|
32 |
+
noise_offset: 0.05
|
33 |
+
noise_scheduler_kwargs:
|
34 |
+
beta_end: 0.012
|
35 |
+
beta_schedule: linear
|
36 |
+
beta_start: 0.00085
|
37 |
+
clip_sample: false
|
38 |
+
num_train_timesteps: 1000
|
39 |
+
steps_offset: 1
|
40 |
+
output_dir: /workspace/camus/train
|
41 |
+
resume_from_checkpoint: ''
|
42 |
+
save_dir: /workspace/camus/train/20240513-stage2-9k
|
43 |
+
save_model_epoch_interval: 1
|
44 |
+
seed: 12580
|
45 |
+
snr_gamma: 5.0
|
46 |
+
solver:
|
47 |
+
adam_beta1: 0.9
|
48 |
+
adam_beta2: 0.999
|
49 |
+
adam_epsilon: 1.0e-08
|
50 |
+
adam_weight_decay: 0.01
|
51 |
+
enable_xformers_memory_efficient_attention: true
|
52 |
+
gradient_accumulation_steps: 1
|
53 |
+
gradient_checkpointing: true
|
54 |
+
learning_rate: 1.0e-05
|
55 |
+
lr_scheduler: constant
|
56 |
+
lr_warmup_steps: 1
|
57 |
+
max_grad_norm: 1.0
|
58 |
+
max_train_steps: 160000
|
59 |
+
mixed_precision: fp16
|
60 |
+
scale_lr: false
|
61 |
+
use_8bit_adam: true
|
62 |
+
stage1_ckpt_dir: /workspace/camus/train/20240510-stage1-9k
|
63 |
+
stage1_ckpt_step: 75392
|
64 |
+
uncond_ratio: 0.1
|
65 |
+
vae_model_path: ./pretrained_weights/sd-vae-ft-mse
|
66 |
+
val:
|
67 |
+
validation_steps: 1000
|
68 |
+
validation:
|
69 |
+
metric:
|
70 |
+
generate_frame_range:
|
71 |
+
- 30
|
72 |
+
- 54
|
73 |
+
guidance_scale: 2.8
|
74 |
+
ref_frame: 29
|
75 |
+
seed: 42
|
76 |
+
steps: 30
|
77 |
+
videos:
|
78 |
+
- configs/inference/metric/oATCBbieJIB8u3QAMAUwvMi9ymEOIc1AoDOajA.mp4
|
79 |
+
- configs/inference/metric/oonQq0HjAC7ExkJlRSMBBs1q3EIiQgFveLD7fD.mp4
|
80 |
+
- configs/inference/metric/os0aLDIkagGgAfAFQsfICCWMuoL8jm3IgJ0Wey.mp4
|
81 |
+
- configs/inference/metric/oYflAvAyfAIFRf3yQDrLRDCWcEDoFENF9tBEgg.mp4
|
82 |
+
pose_range:
|
83 |
+
- 0
|
84 |
+
- 24
|
85 |
+
test_cases:
|
86 |
+
- - ./configs/inference/ref_images/anyone-3.png
|
87 |
+
- ./configs/inference/pose_videos/demo18.mp4
|
88 |
+
- - ./configs/inference/ref_images/anyone-3-partial.png
|
89 |
+
- ./configs/inference/pose_videos/demo6.mp4
|
90 |
+
- - ./configs/inference/ref_images/anyone-2.png
|
91 |
+
- ./configs/inference/pose_videos/demo11.mp4
|
92 |
+
- - ./configs/inference/ref_images/anyone-1.png
|
93 |
+
- ./configs/inference/pose_videos/demo11.mp4
|
94 |
+
- - ./configs/inference/ref_images/anyone-5.png
|
95 |
+
- ./configs/inference/pose_videos/demo11.mp4
|
96 |
+
- - ./configs/inference/ref_images/anyone-11.png
|
97 |
+
- ./configs/inference/pose_videos/demo15.mp4
|
98 |
+
uniform_along_time: false
|
99 |
+
weight_dtype: fp16
|