Spaces:
Runtime error
Runtime error
Update entrypoint.sh.template
Browse files- entrypoint.sh.template +13 -4
entrypoint.sh.template
CHANGED
|
@@ -1,11 +1,20 @@
|
|
| 1 |
#!/bin/bash
|
| 2 |
|
| 3 |
if [[ "$QUANTIZATION" == "false" ]]; then
|
| 4 |
-
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
else
|
| 7 |
-
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
fi
|
| 10 |
|
| 11 |
# Wait for text-generation-inference to start
|
|
|
|
| 1 |
#!/bin/bash
|
| 2 |
|
| 3 |
if [[ "$QUANTIZATION" == "false" ]]; then
|
| 4 |
+
text-generation-launcher --model-id $MODEL_NAME \
|
| 5 |
+
--num-shard 1 --port 8080 --trust-remote-code \
|
| 6 |
+
--max-concurrent-request $MAX_CONCURRENT_REQUESTS \
|
| 7 |
+
--max-top-n-tokens $MAX_INPUT_LENGTH \
|
| 8 |
+
--max-input-length $MAX_TOTAL_TOKENS \
|
| 9 |
+
&
|
| 10 |
else
|
| 11 |
+
text-generation-launcher --model-id $MODEL_NAME \
|
| 12 |
+
--num-shard 1 --port 8080 --trust-remote-code \
|
| 13 |
+
--max-concurrent-request $MAX_CONCURRENT_REQUESTS \
|
| 14 |
+
--max-top-n-tokens $MAX_INPUT_LENGTH \
|
| 15 |
+
--max-input-length $MAX_TOTAL_TOKENS \
|
| 16 |
+
--quantize $QUANTIZATION \
|
| 17 |
+
&
|
| 18 |
fi
|
| 19 |
|
| 20 |
# Wait for text-generation-inference to start
|