File size: 3,308 Bytes
b14067d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/bin/bash
trap 'kill 0' SIGINT

mode=$1  # Options: caption, masking, latent

video_root="../data/train"
pretrained_model_path="../pretrained/CogVideoX-5b-I2V"
raft_ckpt="../pretrained/RAFT/raft-things.pth"
# gpu_list="0,1,2,3,4,5,6,7"
gpu_list="2,3,6,7"

gpus=(${gpu_list//,/ })
num_gpus=${#gpus[@]}

if [[ "$mode" == "caption" ]]; then
    echo "==== Running CAPTION EMBEDDING ===="

    caption_path="$video_root/captions"
    caption_emb_path="$video_root/caption_embs"
    all_files=($caption_path/*.txt)
    total=${#all_files[@]}
    chunk_size=$(( (total + num_gpus - 1) / num_gpus ))

    echo "Total caption files: $total"
    echo "Using $num_gpus GPUs, chunk size: $chunk_size"

    for ((i=0; i<num_gpus; i++)); do
        start_idx=$((i * chunk_size))
        end_idx=$(( (i + 1) * chunk_size ))
        (( end_idx > total )) && end_idx=$total

        gpu_id=${gpus[$i]}
        echo "Launching GPU $gpu_id: captions $start_idx to $end_idx"

        CUDA_VISIBLE_DEVICES=$gpu_id python get_prompt_emb.py \
            --pretrained_model_name_or_path $pretrained_model_path \
            --caption_path $caption_path \
            --output_path $caption_emb_path \
            --gpu_id $gpu_id \
            --start_idx $start_idx \
            --end_idx $end_idx &
    done

elif [[ "$mode" == "masking" ]]; then
    echo "==== Running VIDEO MASKING ===="

    source_video_dir="$video_root/videos"
    mask_dir="$video_root/masks"
    masked_video_dir="$video_root/masked_videos"
    all_videos=($source_video_dir/*.mp4)
    total=${#all_videos[@]}
    chunk_size=$(( (total + num_gpus - 1) / num_gpus ))

    echo "Total videos: $total"
    echo "Using $num_gpus GPUs, chunk size: $chunk_size"

    for ((i=0; i<num_gpus; i++)); do
        start_idx=$((i * chunk_size))
        end_idx=$(( (i + 1) * chunk_size ))
        (( end_idx > total )) && end_idx=$total

        gpu_id=${gpus[$i]}
        echo "Launching GPU $gpu_id: videos $start_idx to $end_idx"

        CUDA_VISIBLE_DEVICES=$gpu_id python get_masked_videos.py \
            --video_path $source_video_dir \
            --output_path $masked_video_dir \
            --mask_path $mask_dir \
            --raft_ckpt $raft_ckpt \
            --start_idx $start_idx \
            --end_idx $end_idx \
            --gpu_id $gpu_id &
    done

elif [[ "$mode" == "latent" ]]; then
    echo "==== Running LATENT ENCODING ===="

    all_videos=($video_root/videos/*.mp4)
    total=${#all_videos[@]}
    chunk_size=$(( (total + num_gpus - 1) / num_gpus ))

    echo "Total videos: $total"
    echo "Using $num_gpus GPUs, chunk size: $chunk_size"

    for ((i=0; i<num_gpus; i++)); do
        start_idx=$((i * chunk_size))
        end_idx=$(( (i + 1) * chunk_size ))
        (( end_idx > total )) && end_idx=$total

        gpu_id=${gpus[$i]}
        echo "Launching GPU $gpu_id: videos $start_idx to $end_idx"

        CUDA_VISIBLE_DEVICES=$gpu_id python get_vae_latent.py \
            --video_root $video_root \
            --pretrained_model_path $pretrained_model_path \
            --start_idx $start_idx \
            --end_idx $end_idx \
            --gpu_id $gpu_id &
    done

else
    echo "Unknown mode: $mode"
    echo "Usage: bash preprocess.sh [caption|masking|latent]"
    exit 1
fi

wait
echo "All processes completed."