Spaces:
Running
Running
File size: 2,746 Bytes
9fd1204 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
#!/bin/bash
set -e -x
# export TORCH_LOGS="+dynamo,recompiles,graph_breaks"
# export TORCHDYNAMO_VERBOSE=1
# export WANDB_MODE="offline"
export WANDB_MODE="disabled"
export NCCL_P2P_DISABLE=1
export NCCL_IB_DISABLE=1
export TORCH_NCCL_ENABLE_MONITORING=0
export FINETRAINERS_LOG_LEVEL="DEBUG"
BACKEND="ptd"
NUM_GPUS=2
CUDA_VISIBLE_DEVICES="2,3"
# Check the JSON files for the expected JSON format
DATASET_FILE="examples/inference/cogview4/dummy_text_to_image.json"
# Depending on how many GPUs you have available, choose your degree of parallelism and technique!
DDP_1="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 1 --dp_shards 1 --cp_degree 1 --tp_degree 1"
DDP_2="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 2 --dp_shards 1 --cp_degree 1 --tp_degree 1"
DDP_4="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 4 --dp_shards 1 --cp_degree 1 --tp_degree 1"
DDP_8="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 8 --dp_shards 1 --cp_degree 1 --tp_degree 1"
CP_2="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 1 --dp_shards 1 --cp_degree 2 --tp_degree 1"
CP_4="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 1 --dp_shards 1 --cp_degree 4 --tp_degree 1"
# FSDP_2="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 1 --dp_shards 2 --cp_degree 1 --tp_degree 1"
# FSDP_4="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 1 --dp_shards 4 --cp_degree 1 --tp_degree 1"
# HSDP_2_2="--parallel_backend $BACKEND --pp_degree 1 --dp_degree 2 --dp_shards 2 --cp_degree 1 --tp_degree 1"
# Parallel arguments
parallel_cmd=(
$CP_2
)
# Model arguments
model_cmd=(
--model_name "cogview4"
--pretrained_model_name_or_path "THUDM/CogView4-6B"
--enable_slicing
--enable_tiling
)
# Inference arguments
inference_cmd=(
--inference_type text_to_image
--dataset_file "$DATASET_FILE"
)
# Attention provider arguments
attn_provider_cmd=(
--attn_provider flash_varlen
)
# Torch config arguments
torch_config_cmd=(
--allow_tf32
--float32_matmul_precision high
)
# Miscellaneous arguments
miscellaneous_cmd=(
--seed 31337
--tracker_name "finetrainers-inference"
--output_dir "/raid/aryan/cogview4-inference"
--init_timeout 600
--nccl_timeout 600
--report_to "wandb"
)
# Execute the inference script
export CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES
torchrun \
--standalone \
--nnodes=1 \
--nproc_per_node=$NUM_GPUS \
--rdzv_backend c10d \
--rdzv_endpoint="localhost:19242" \
examples/inference/inference.py \
"${parallel_cmd[@]}" \
"${model_cmd[@]}" \
"${inference_cmd[@]}" \
"${attn_provider_cmd[@]}" \
"${torch_config_cmd[@]}" \
"${miscellaneous_cmd[@]}"
echo -ne "-------------------- Finished executing script --------------------\n\n"
|