File size: 1,915 Bytes
b14067d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
MODEL_PATH="pretrained/CogVideoX-5b-I2V"
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True

video_root_dir="data/train" # subfolders: annotations/ pose_files/ video_clips/

dir=`pwd`
output_dir=${dir}/out/EPiC
MODEL_PATH=${dir}/${MODEL_PATH}
video_root_dir=${dir}/${video_root_dir}
cd training

CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" accelerate launch --config_file accelerate_config_machine.yaml --multi_gpu --main_process_port 29502 \
  train_controlnet_i2v_pcd_render_mask_aware_add_dash_use_latent.py \
  --tracker_name "cogvideox-controlnet" \
  --gradient_checkpointing \
  --pretrained_model_name_or_path $MODEL_PATH \
  --enable_tiling \
  --enable_slicing \
  --num_inference_steps 28 \
  --seed 42 \
  --mixed_precision bf16 \
  --output_dir $output_dir \
  --height 480 \
  --width 720 \
  --fps 8 \
  --max_num_frames 49 \
  --video_root_dir $video_root_dir \
  --hflip_p 0.0 \
  --controlnet_transformer_num_layers 8 \
  --controlnet_input_channels 3 \
  --downscale_coef 8 \
  --controlnet_weights 1.0 \
  --train_batch_size 2 \
  --dataloader_num_workers 0 \
  --num_train_epochs 2 \
  --checkpointing_steps 500 \
  --gradient_accumulation_steps 1 \
  --learning_rate 2e-4 \
  --lr_scheduler cosine_with_restarts \
  --lr_warmup_steps 250 \
  --lr_num_cycles 1 \
  --enable_slicing \
  --enable_tiling \
  --gradient_checkpointing \
  --optimizer AdamW \
  --adam_beta1 0.9 \
  --adam_beta2 0.95 \
  --max_grad_norm 1.0 \
  --allow_tf32 \
  --enable_time_sampling \
  --time_sampling_type truncated_normal \
  --time_sampling_mean 0.95 \
  --time_sampling_std 0.1 \
  --controlnet_guidance_start 0.0 \
  --controlnet_guidance_end 1.0 \
  --controlnet_transformer_num_attn_heads 4 \
  --controlnet_transformer_attention_head_dim 64 \
  --controlnet_transformer_out_proj_dim_factor 64 \
  --controlnet_transformer_out_proj_dim_zero_init \
  --text_embedding_path "${video_root_dir}/caption_embs"