File size: 1,915 Bytes
b14067d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
MODEL_PATH="pretrained/CogVideoX-5b-I2V"
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
video_root_dir="data/train" # subfolders: annotations/ pose_files/ video_clips/
dir=`pwd`
output_dir=${dir}/out/EPiC
MODEL_PATH=${dir}/${MODEL_PATH}
video_root_dir=${dir}/${video_root_dir}
cd training
CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" accelerate launch --config_file accelerate_config_machine.yaml --multi_gpu --main_process_port 29502 \
train_controlnet_i2v_pcd_render_mask_aware_add_dash_use_latent.py \
--tracker_name "cogvideox-controlnet" \
--gradient_checkpointing \
--pretrained_model_name_or_path $MODEL_PATH \
--enable_tiling \
--enable_slicing \
--num_inference_steps 28 \
--seed 42 \
--mixed_precision bf16 \
--output_dir $output_dir \
--height 480 \
--width 720 \
--fps 8 \
--max_num_frames 49 \
--video_root_dir $video_root_dir \
--hflip_p 0.0 \
--controlnet_transformer_num_layers 8 \
--controlnet_input_channels 3 \
--downscale_coef 8 \
--controlnet_weights 1.0 \
--train_batch_size 2 \
--dataloader_num_workers 0 \
--num_train_epochs 2 \
--checkpointing_steps 500 \
--gradient_accumulation_steps 1 \
--learning_rate 2e-4 \
--lr_scheduler cosine_with_restarts \
--lr_warmup_steps 250 \
--lr_num_cycles 1 \
--enable_slicing \
--enable_tiling \
--gradient_checkpointing \
--optimizer AdamW \
--adam_beta1 0.9 \
--adam_beta2 0.95 \
--max_grad_norm 1.0 \
--allow_tf32 \
--enable_time_sampling \
--time_sampling_type truncated_normal \
--time_sampling_mean 0.95 \
--time_sampling_std 0.1 \
--controlnet_guidance_start 0.0 \
--controlnet_guidance_end 1.0 \
--controlnet_transformer_num_attn_heads 4 \
--controlnet_transformer_attention_head_dim 64 \
--controlnet_transformer_out_proj_dim_factor 64 \
--controlnet_transformer_out_proj_dim_zero_init \
--text_embedding_path "${video_root_dir}/caption_embs" |