Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 2,477 Bytes
f6e13e9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
#!/bin/bash
cd text-generation-inference
conda create -n tgi python=3.11
eval "$(/home/user/miniconda3/bin/conda shell.bash hook)"
conda install -c conda-forge pkg-config openssl
conda activate tgi
export OPENSSL_DIR=$CONDA_PREFIX && \
export OPENSSL_INCLUDE_DIR=$CONDA_PREFIX/include && \
export OPENSSL_LIB_DIR=$CONDA_PREFIX/lib && \
export PKG_CONFIG_PATH=$CONDA_PREFIX/lib/pkgconfig
export PYTHONPATH=/home/user/miniconda3/envs/tgi/lib/python3.11/site-packages
export LD_LIBRARY_PATH=/home/user/:$LD_LIBRARY_PATH
ln -s /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1 /home/user/libnvidia-ml.so
nohup text-generation-launcher --model-id HuggingFaceH4/zephyr-7b-beta -p 7860 &> qwen2.log &
PYTHONPATH=/home/user/:$PYTHONPATH \
LD_LIBRARY_PATH=/home/user/:$LD_LIBRARY_PATH \
text-generation-launcher \
--model-id HuggingFaceH4/zephyr-7b-beta \
--disable-custom-kernels \
-p 7860
LD_LIBRARY_PATH=/home/user/:$LD_LIBRARY_PATH \
text-generation-launcher \
--model-id HuggingFaceH4/zephyr-7b-beta \
-p 7860
LD_LIBRARY_PATH=/home/user/:$LD_LIBRARY_PATH text-generation-launcher --model-id Qwen/Qwen2.5-VL-7B-Instruct -p 7860
LD_LIBRARY_PATH=/home/user/:$LD_LIBRARY_PATH text-generation-launcher --model-id Qwen/Qwen3-8B -p 7860
text-generation-launcher \
--model-id HuggingFaceH4/zephyr-7b-beta \
--disable-custom-kernels
# To run the server in the background, use:
nohup text-generation-launcher \
--model-id mistralai/Mistral-7B-v0.1 \
--port 8080 \
--max-batch-prefill-tokens 2048 \
--max-batch-total-tokens 4096 \
--max-input-length 4096 \
--max-total-tokens 8192 \
--max-batch-size 32 \
--max-waiting-tokens 20 \
--hostname 0.0.0.0 \
--cuda-memory-fraction 0.95 \
--max-concurrent-requests 128 \
--trust-remote-code \
--json-output > tgi.log 2>&1 &
# To stop the server, use:
ps aux | grep text-generation-launcher
pkill -f text-generation-launcher
kill -9 $(nvidia-smi | grep python | awk '{ print $5 }')
curl https://jdelavande-dev-tgi.hf.space/generate \
-X POST \
-H "Content-Type: application/json" \
-d '{"inputs":"Bonjour !", "parameters":{"max_new_tokens":20}}'
curl https://jdelavande-dev-tgi2.hf.space/ \
-X POST \
-H "Content-Type: application/json" \
-d '{"inputs":"Bonjour !", "parameters":{"max_new_tokens":20}}'
curl localhost:7860/generate \
-X POST \
-H "Content-Type: application/json" \
-d '{"inputs":"Bonjour !", "parameters":{"max_new_tokens":20}}' |