File size: 487 Bytes
e9fa53a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
MODEL_PATH=$1
MODEL_NAME=$2
API_KEY=$3
PORT=$4
TP=$5
VISIABLE_DEVICES=$(seq -s, $6 $7);
echo "MODEL-PATH ${MODEL_PATH} API_key ${API_KEY} PORT ${PORT} TP ${TP} cuda visiable devices : ${VISIABLE_DEVICES}"
CUDA_VISIBLE_DEVICES=${VISIABLE_DEVICES} vllm serve $MODEL_PATH \
--port $PORT \
--tensor-parallel-size $TP \
--served-model-name $MODEL_NAME \
--enable-chunked-prefill \
--enforce-eager \
--api-key $API_KEY \
--disable-log-requests \
--max_model_len 8192
|