Spaces:

divvun-tts
/

multi-sami

Sleeping

multi-sami / run_training_cluster_s.sh

katrihiovain

initial commit

8146713 7 months ago

842 Bytes

	#!/bin/bash
	#SBATCH --job-name=train_fastpitch
	#SBATCH --account=nn9866k
	#SBATCH --time=11:50:00
	#SBATCH --mem=16G
	#SBATCH --partition=accel
	#SBATCH --gres=gpu:1

	# == Logging

	#SBATCH --error="log_err" # Save the error messages
	#SBATCH --output="log_out" # Save the stdout

	## Set up job environment:
	# set -o errexit # Exit the script on any error
	# set -o nounset # Treat any unset variables as an error

	## Activate environment
	# source ~/.bashrc

	eval "$(conda shell.bash hook)"
	conda activate fastpitch

	# Setup monitoring
	nvidia-smi --query-gpu=timestamp,utilization.gpu,utilization.memory \
	--format=csv --loop=1 > "gpu_util-$SLURM_JOB_ID.csv" &
	NVIDIA_MONITOR_PID=$! # Capture PID of monitoring process

	# Run our computation
	bash scripts/train_2.sh

	# After computation stop monitoring
	kill -SIGINT "$NVIDIA_MONITOR_PID"