Spaces:
Runtime error
Runtime error
File size: 3,106 Bytes
e8bdafd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
export TOKENIZERS_PARALLELISM=false
WORKSPACE=$(dirname "$0")
cd $WORKSPACE
ACCELERATE_CONFIG_FILE=${WORKSPACE}/accelerate_config.yaml
PRETRAINED_MODEL_DIR=$(dirname "$0")/pretrained
DATA_ROOT=$(dirname "$0")/data/RealCam-Vid
SPLIT=train
CHECKPOINT_DIR=$(dirname "$0")/checkpoints
EXPERIMENT_NAME=RealCam-I2V
SUB_EXPERIMENT_NAME=CogVideoX1.5-5B-ControlNetXs
LOG_DIR=${CHECKPOINT_DIR}/${EXPERIMENT_NAME}/${SUB_EXPERIMENT_NAME}
mkdir -p ${LOG_DIR}
export WANDB_DIR=${LOG_DIR}
# Model Configuration
MODEL_ARGS=(
--model_path ${PRETRAINED_MODEL_DIR}/CogVideoX1.5-5B-I2V
--model_name "cogvideox1.5-i2v"
--model_type "i2v"
--training_type "controlnetxs"
--time_sampling_type "truncated_normal"
--time_sampling_mean 0.8
--time_sampling_type 0.075
--keep_aspect_ratio
)
# Output Configuration
OUTPUT_ARGS=(
--output_dir $LOG_DIR
--report_to "wandb"
--tracker_name $EXPERIMENT_NAME
--sub_tracker_name $SUB_EXPERIMENT_NAME
)
# Training Configuration
TRAIN_ARGS=(
--train_steps 50000
--batch_size 1
--gradient_accumulation_steps 1
--learning_rate 4e-5
--weight_decay 1e-4
--mixed_precision "bf16" # ["no", "fp16"]
--gradient_checkpointing
--enable_slicing
--enable_tiling
--seed 42
)
# System Configuration
SYSTEM_ARGS=(
--num_workers 4
--pin_memory
--nccl_timeout 1800
)
# Checkpointing Configuration
CHECKPOINT_ARGS=(
--checkpointing_steps 100
--checkpointing_limit 100
)
# Validation Configuration
VALIDATION_ARGS=(
--do_validation
--validation_dir ${CHECKPOINT_DIR}
--validation_steps 100
--validation_prompts "prompts.txt"
--validation_images "images.txt"
--gen_fps 8
)
# extract video latents of 81x256x448 ; "768//3 x 1360//3 "
DATA_ARGS=(
--data_root ${DATA_ROOT}
--cache_root $(dirname "$0")/data/cache
--metadata_path RealCam-Vid_new_${SPLIT}.npz
--enable_align_factor
)
# distribution args for multi-node
DIST_ARGS=(
--config_file $ACCELERATE_CONFIG_FILE
--num_machines $HOST_NUM
--num_processes $NODE_NUM
--machine_rank $INDEX
--main_process_ip $CHIEF_IP
--main_process_port 29500
)
accelerate launch "${DIST_ARGS[@]}" train.py \
"${MODEL_ARGS[@]}" \
"${OUTPUT_ARGS[@]}" \
"${DATA_ARGS[@]}" \
"${TRAIN_ARGS[@]}" \
"${SYSTEM_ARGS[@]}" \
"${CHECKPOINT_ARGS[@]}" \
"${VALIDATION_ARGS[@]}" \
--train_resolution "81x768x1360" \
--precompute
# Optional for landscape/portrait joint training
# accelerate launch "${DIST_ARGS[@]}" train.py \
# "${MODEL_ARGS[@]}" \
# "${OUTPUT_ARGS[@]}" \
# "${DATA_ARGS[@]}" \
# "${TRAIN_ARGS[@]}" \
# "${SYSTEM_ARGS[@]}" \
# "${CHECKPOINT_ARGS[@]}" \
# "${VALIDATION_ARGS[@]}" \
# --train_resolution "81x1360x768" \
# --precompute
accelerate launch ${DIST_ARGS[@]} train.py \
${MODEL_ARGS[@]} \
${OUTPUT_ARGS[@]} \
${DATA_ARGS[@]} \
${TRAIN_ARGS[@]} \
${SYSTEM_ARGS[@]} \
${CHECKPOINT_ARGS[@]} \
${VALIDATION_ARGS[@]} \
--train_resolution "81x768x1360" \
# --allow_switch_hw
|