Leonardo6
/

git

Model card Files Files and versions

git / segmentation /run_kfold_training.sh

Leonardo6's picture

Add files using upload-large-folder tool

63e060d verified 3 months ago

history blame contribute delete

3.07 kB

	#!/bin/bash

	# Script to run 3-fold cross-validation training for Cityscapes

	# Ensure the script exits if any command fails
	set -e

	# --- Configuration ---
	# Absolute path to your Cityscapes dataset
	CITYSCAPES_ROOT="/pasteur/u/yiming/homework4/cityscapes"

	# Path to your main MMSegmentation configuration file
	CONFIG_FILE="configs/cityscapes/segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.py"

	# Number of folds
	N_FOLDS=3

	# Number of GPUs (as in your original command)
	GPUS=8

	# Workspace root (where dist_train.sh is located, assuming this script is in segmentation/)
	WORKSPACE_ROOT=$(pwd) # This script should be in the segmentation directory

	# --- Script Logic ---

	echo "Starting 3-fold cross-validation training..."

	for FOLD in $(seq 1 $N_FOLDS)
	do
	echo "----------------------------------------------------"
	echo "Starting Training for Fold $FOLD of $N_FOLDS"
	echo "----------------------------------------------------"

	# Define the work directory for this specific fold
	WORK_DIR="work_dirs/cityscapes_kfold/mask2former_internimage_h_fold_${FOLD}"

	# Define paths to the split files for the current fold
	TRAIN_SPLIT_FILE="splits/fold_${FOLD}_train_split.txt"
	VAL_SPLIT_FILE="splits/fold_${FOLD}_val_split.txt"

	# Construct the override options for mmsegmentation
	# Note: We point img_dir and ann_dir to the parent directories
	# because the split files contain paths relative to them (e.g., 'train/aachen/aachen_000000_000019_leftImg8bit.png')
	CFG_OPTIONS="\
	data.train.data_root='${CITYSCAPES_ROOT}' \
	data.train.data_root='${CITYSCAPES_ROOT}' \
	data.train.img_dir='leftImg8bit/' \
	data.train.ann_dir='gtFine/' \
	data.train.split='${TRAIN_SPLIT_FILE}' \
	data.val.data_root='${CITYSCAPES_ROOT}' \
	data.val.img_dir='leftImg8bit/' \
	data.val.ann_dir='gtFine/' \
	data.val.split='${VAL_SPLIT_FILE}' \
	data.test.data_root='${CITYSCAPES_ROOT}' \
	data.test.img_dir='leftImg8bit/val/' \
	data.test.ann_dir='gtFine/val/' \
	work_dir='${WORK_DIR}'"

	# Define the training command
	# Assuming dist_train.sh is in the parent directory of where this script (run_kfold_training.sh) is located
	TRAIN_CMD="bash ./dist_train.sh ${CONFIG_FILE} ${GPUS} --cfg-options ${CFG_OPTIONS}"

	echo "Training command for Fold $FOLD:"
	echo "${TRAIN_CMD}"
	echo "Output will be in: ${WORK_DIR}"

	# Execute the training command
	eval ${TRAIN_CMD}

	echo "----------------------------------------------------"
	echo "Finished Training for Fold $FOLD"
	echo "----------------------------------------------------"
	done

	echo "3-fold cross-validation training complete."
	echo "Check work_dirs/cityscapes_kfold/ for outputs of each fold."

	# To aggregate results or find the best model, you would typically:
	# 1. Look at the mIoU scores in the log files for each fold's validation set.
	# 2. Potentially average the performance, or pick the model from the best performing fold.
	# 3. If needed, re-train a final model on ALL combined train+val data from all folds,
	# or use the best single fold's model for final testing on the actual test set.