Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
cd text-generation-inference | |
conda create -n tgi python=3.11 | |
eval "$(/home/user/miniconda3/bin/conda shell.bash hook)" | |
conda install -c conda-forge pkg-config openssl | |
conda activate tgi | |
export OPENSSL_DIR=$CONDA_PREFIX && \ | |
export OPENSSL_INCLUDE_DIR=$CONDA_PREFIX/include && \ | |
export OPENSSL_LIB_DIR=$CONDA_PREFIX/lib && \ | |
export PKG_CONFIG_PATH=$CONDA_PREFIX/lib/pkgconfig | |
export PYTHONPATH=/home/user/miniconda3/envs/tgi/lib/python3.11/site-packages | |
export LD_LIBRARY_PATH=/home/user/:$LD_LIBRARY_PATH | |
ln -s /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1 /home/user/libnvidia-ml.so | |
nohup text-generation-launcher --model-id HuggingFaceH4/zephyr-7b-beta -p 7860 &> qwen2.log & | |
PYTHONPATH=/home/user/:$PYTHONPATH \ | |
LD_LIBRARY_PATH=/home/user/:$LD_LIBRARY_PATH \ | |
text-generation-launcher \ | |
--model-id HuggingFaceH4/zephyr-7b-beta \ | |
--disable-custom-kernels \ | |
-p 7860 | |
LD_LIBRARY_PATH=/home/user/:$LD_LIBRARY_PATH \ | |
text-generation-launcher \ | |
--model-id HuggingFaceH4/zephyr-7b-beta \ | |
-p 7860 | |
LD_LIBRARY_PATH=/home/user/:$LD_LIBRARY_PATH text-generation-launcher --model-id Qwen/Qwen2.5-VL-7B-Instruct -p 7860 | |
LD_LIBRARY_PATH=/home/user/:$LD_LIBRARY_PATH text-generation-launcher --model-id Qwen/Qwen3-8B -p 7860 | |
text-generation-launcher \ | |
--model-id HuggingFaceH4/zephyr-7b-beta \ | |
--disable-custom-kernels | |
# To run the server in the background, use: | |
nohup text-generation-launcher \ | |
--model-id mistralai/Mistral-7B-v0.1 \ | |
--port 8080 \ | |
--max-batch-prefill-tokens 2048 \ | |
--max-batch-total-tokens 4096 \ | |
--max-input-length 4096 \ | |
--max-total-tokens 8192 \ | |
--max-batch-size 32 \ | |
--max-waiting-tokens 20 \ | |
--hostname 0.0.0.0 \ | |
--cuda-memory-fraction 0.95 \ | |
--max-concurrent-requests 128 \ | |
--trust-remote-code \ | |
--json-output > tgi.log 2>&1 & | |
# To stop the server, use: | |
ps aux | grep text-generation-launcher | |
pkill -f text-generation-launcher | |
kill -9 $(nvidia-smi | grep python | awk '{ print $5 }') | |
curl https://jdelavande-dev-tgi.hf.space/generate \ | |
-X POST \ | |
-H "Content-Type: application/json" \ | |
-d '{"inputs":"Bonjour !", "parameters":{"max_new_tokens":20}}' | |
curl https://jdelavande-dev-tgi2.hf.space/ \ | |
-X POST \ | |
-H "Content-Type: application/json" \ | |
-d '{"inputs":"Bonjour !", "parameters":{"max_new_tokens":20}}' | |
curl localhost:7860/generate \ | |
-X POST \ | |
-H "Content-Type: application/json" \ | |
-d '{"inputs":"Bonjour !", "parameters":{"max_new_tokens":20}}' |