File size: 487 Bytes
e9fa53a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
MODEL_PATH=$1
MODEL_NAME=$2
API_KEY=$3
PORT=$4
TP=$5
VISIABLE_DEVICES=$(seq -s, $6 $7);

echo "MODEL-PATH ${MODEL_PATH} API_key ${API_KEY} PORT ${PORT} TP ${TP} cuda visiable devices : ${VISIABLE_DEVICES}"
CUDA_VISIBLE_DEVICES=${VISIABLE_DEVICES} vllm serve $MODEL_PATH \
   --port $PORT \
   --tensor-parallel-size $TP \
   --served-model-name $MODEL_NAME \
   --enable-chunked-prefill \
   --enforce-eager \
   --api-key $API_KEY \
   --disable-log-requests \
   --max_model_len 8192