Syzygianinfern0 commited on
Commit
a9665f3
·
1 Parent(s): 5822c0a

Tricks for lower GPU utilization

Browse files
Files changed (1) hide show
  1. vllm_serve.sh +3 -1
vllm_serve.sh CHANGED
@@ -9,4 +9,6 @@ PORT=8000
9
  vllm serve $MODEL \
10
  --port $PORT \
11
  --trust-remote-code \
12
- --limit-mm-per-prompt image=4
 
 
 
9
  vllm serve $MODEL \
10
  --port $PORT \
11
  --trust-remote-code \
12
+ --limit-mm-per-prompt image=4 \
13
+ --enforce-eager \
14
+ --max-model-len 32768