File size: 771 Bytes
96ef4b5 2357c2a 43e8c6f 2357c2a 43e8c6f 2357c2a 43e8c6f 2357c2a 43e8c6f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
#!/bin/bash
if [ -z "$QUANT_METHOD" ]; then
echo "Using native precision";
python3 -m sglang.launch_server \
--model-path $MODEL_ID \
--kv-cache-dtype $KV_CACHE_DTYPE \
--tensor-parallel-size $TP_SIZE \
--expert-parallel-size $TP_SIZE \
--enable-torch-compile \
--enable-ep-moe \
--tool-call-parser qwen25 \
--host 0.0.0.0 \
--port 80;
else
echo "Using ${QUANT_METHOD} quantization schema";
python3 -m sglang.launch_server \
--model-path $MODEL_ID \
--kv-cache-dtype $KV_CACHE_DTYPE \
--tensor-parallel-size $TP_SIZE \
--expert-parallel-size $TP_SIZE \
--quantization $QUANT_METHOD \
--enable-torch-compile \
--enable-ep-moe \
--tool-call-parser qwen25 \
--host 0.0.0.0 \
--port 80;
fi
|