#!/bin/bash

# Script to run 3-fold cross-validation training for Cityscapes

# Ensure the script exits if any command fails
set -e

# --- Configuration ---
# Absolute path to your Cityscapes dataset
CITYSCAPES_ROOT="/pasteur/u/yiming/homework4/cityscapes"

# Path to your main MMSegmentation configuration file
CONFIG_FILE="configs/cityscapes/segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.py"

# Number of folds
N_FOLDS=3

# Number of GPUs (as in your original command)
GPUS=8

# Workspace root (where dist_train.sh is located, assuming this script is in segmentation/)
WORKSPACE_ROOT=$(pwd) # This script should be in the segmentation directory

# --- Script Logic ---

echo "Starting 3-fold cross-validation training..."

for FOLD in $(seq 1 $N_FOLDS)
do
  echo "----------------------------------------------------"
  echo "Starting Training for Fold $FOLD of $N_FOLDS"
  echo "----------------------------------------------------"

  # Define the work directory for this specific fold
  WORK_DIR="work_dirs/cityscapes_kfold/mask2former_internimage_h_fold_${FOLD}"

  # Define paths to the split files for the current fold
  TRAIN_SPLIT_FILE="splits/fold_${FOLD}_train_split.txt"
  VAL_SPLIT_FILE="splits/fold_${FOLD}_val_split.txt"

  # Construct the override options for mmsegmentation
  # Note: We point img_dir and ann_dir to the parent directories
  # because the split files contain paths relative to them (e.g., 'train/aachen/aachen_000000_000019_leftImg8bit.png')
  CFG_OPTIONS="\
data.train.data_root='${CITYSCAPES_ROOT}' \
data.train.data_root='${CITYSCAPES_ROOT}' \
data.train.img_dir='leftImg8bit/' \
data.train.ann_dir='gtFine/' \
data.train.split='${TRAIN_SPLIT_FILE}' \
data.val.data_root='${CITYSCAPES_ROOT}' \
data.val.img_dir='leftImg8bit/' \
data.val.ann_dir='gtFine/' \
data.val.split='${VAL_SPLIT_FILE}' \
data.test.data_root='${CITYSCAPES_ROOT}' \
data.test.img_dir='leftImg8bit/val/' \
data.test.ann_dir='gtFine/val/' \
work_dir='${WORK_DIR}'"

  # Define the training command
  # Assuming dist_train.sh is in the parent directory of where this script (run_kfold_training.sh) is located
  TRAIN_CMD="bash ./dist_train.sh ${CONFIG_FILE} ${GPUS} --cfg-options ${CFG_OPTIONS}"

  echo "Training command for Fold $FOLD:"
  echo "${TRAIN_CMD}"
  echo "Output will be in: ${WORK_DIR}"

  # Execute the training command
  eval ${TRAIN_CMD}

  echo "----------------------------------------------------"
  echo "Finished Training for Fold $FOLD"
  echo "----------------------------------------------------"
done

echo "3-fold cross-validation training complete."
echo "Check work_dirs/cityscapes_kfold/ for outputs of each fold."

# To aggregate results or find the best model, you would typically:
# 1. Look at the mIoU scores in the log files for each fold's validation set.
# 2. Potentially average the performance, or pick the model from the best performing fold.
# 3. If needed, re-train a final model on ALL combined train+val data from all folds,
#    or use the best single fold's model for final testing on the actual test set.