Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,300 Bytes
4f7b5ea |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
export WANDB_BASE_URL="https://api.wandb.ai"
#export WANDB_MODE=online
export WANDB_MODE=offline
export WANDB_API_KEY=xxx
export HF_ENDPOINT="https://hf-mirror.com"
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True #
export TORCH_NCCL_TRACE_BUFFER_SIZE=1048576 # 1MB
export TORCH_NCCL_ASYNC_ERROR_HANDLING=1
export CUBLAS_WORKSPACE_CONFIG=:4096:8
torchrun --nnodes 1 --nproc_per_node 8 --node_rank=0 --master_addr=127.0.0.1 --master_port=24431 \
fastvideo/train.py \
--seed 142 \
--pretrained_model_name_or_path /DATA/bvac/personal/wan21/Wan2.1-I2V-14B-480P \
--model_type "wan" \
--data_json_path test_data/data.json \
--gradient_checkpointing \
--train_batch_size=1 \
--num_latent_t 240 \
--sp_size 8 \
--train_sp_batch_size 1 \
--dataloader_num_workers 4 \
--gradient_accumulation_steps=1 \
--max_train_steps=20000 \
--learning_rate=1e-5 \
--mixed_precision=bf16 \
--checkpointing_steps=400 \
--validation_steps 20000 \
--validation_sampling_steps 64 \
--checkpoints_total_limit 3 \
--allow_tf32 \
--ema_start_step 0 \
--cfg 0.0 \
--ema_decay 0.999 \
--log_validation \
--output_dir=outputs-sp8 \
--tracker_project_name sp8 \
--validation_guidance_scale "1.0" \
--group_frame
|