Spaces:
Build error
Build error
# Copyright (C) 2020 Intel Corporation | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, | |
# software distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions | |
# and limitations under the License. | |
# Kill all subprocesses and exit when ctrl-c is pressed | |
function stop_all() { | |
echo "Stopping processes" | |
pkill top | |
pkill pcm | |
tokill=`ps aux | grep tools/train.py | grep -v grep | head -n 1 | awk '{print $2}'` | |
kill "${tokill}" | |
} | |
trap "echo; echo Killing processes...; stop_all; exit 0" SIGINT SIGTERM | |
#Install PCM if it is not already | |
if [ -d "./pcm" ] | |
then | |
echo PCM installed | |
modprobe msr | |
else | |
git clone https://github.com/opcm/pcm.git | |
pushd pcm | |
make | |
make install | |
apt install sysstat | |
modprobe msr | |
popd | |
fi | |
#Creat output dir | |
DATETIME=`date -u +"%Y%b%d_%H%M"` | |
mkdir -p output/$DATETIME | |
rundir="$PWD/output/$DATETIME" | |
echo ${rundir} | |
#Run training | |
pushd .. | |
python tools/train.py "$@" 2>&1 | tee -a ${rundir}/train.log & | |
popd | |
sleep 10 | |
#Collect system-level metrics | |
top -b -i -o %CPU > "${rundir}"/top.log & | |
echo TOP STARTED | |
./pcm/pcm.x > "${rundir}"/pcm.log & | |
echo PCM STARTED | |
echo NVIDIA LOOP STARTING | |
for i in `seq 1 60` | |
do | |
nvidia-smi --query-gpu=utilization.gpu --format=csv >> "${rundir}"/gpu_utilization.log | |
nvidia-smi --query-gpu=utilization.memory --format=csv >> "${rundir}"/gpu_mem.log | |
nvidia-smi --query-gpu=temperature.gpu --format=csv >> "${rundir}"/gpu_temp.log | |
sleep 1 | |
done | |
#Cleanup when done | |
pkill top | |
pkill pcm | |