File size: 842 Bytes
8146713
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#!/bin/bash
#SBATCH --job-name=train_fastpitch
#SBATCH --account=nn9866k
#SBATCH --time=11:50:00
#SBATCH --mem=16G
#SBATCH --partition=accel
#SBATCH --gres=gpu:1

# == Logging

#SBATCH --error="log_err" # Save the error messages
#SBATCH --output="log_out" # Save the stdout

## Set up job environment:
# set -o errexit  # Exit the script on any error
# set -o nounset  # Treat any unset variables as an error

## Activate environment
# source ~/.bashrc

eval "$(conda shell.bash hook)"
conda activate fastpitch

# Setup monitoring
nvidia-smi --query-gpu=timestamp,utilization.gpu,utilization.memory \
        --format=csv --loop=1 > "gpu_util-$SLURM_JOB_ID.csv" &
NVIDIA_MONITOR_PID=$!  # Capture PID of monitoring process

# Run our computation
bash scripts/train_2.sh

# After computation stop monitoring
kill -SIGINT "$NVIDIA_MONITOR_PID"