Spaces:

raksama19
/

Test-Dolphin-PDF

Runtime error

Test-Dolphin-PDF / deployment /tensorrt_llm /convert_dolphin.sh

raksa-the-wildcats

first commit

383af88 about 1 month ago

1.96 kB

	#!/usr/bin/env bash
	set -ex

	############################################################################################
	# Reference: https://github.com/NVIDIA/TensorRT-LLM/tree/v0.18.2/examples/multimodal#nougat
	############################################################################################

	export LD_LIBRARY_PATH=/usr/local/lib/python3.10/site-packages/tensorrt_libs/:/usr/local/lib/python3.10/site-packages/nvidia/cudnn/lib/:$LD_LIBRARY_PATH

	# 1. Download Huggingface weights
	export MODEL_NAME="Dolphin"
	git clone https://huggingface.co/Bytedance/${MODEL_NAME} tmp/hf_models/${MODEL_NAME}


	export MAX_BATCH_SIZE=16
	export MAX_SEQ_LEN=4096
	export MAX_INPUT_LEN=10
	export MAX_ENCODER_INPUT_LEN=784

	# 2. Convert Huggingface weights into TRT-LLM checkpoints and build TRT engines using scripts in examples/enc_dec
	python ./convert/convert_checkpoint.py --model_type bart \
	--model_dir tmp/hf_models/${MODEL_NAME} \
	--output_dir tmp/trt_models/${MODEL_NAME}/bfloat16 \
	--tp_size 1 \
	--pp_size 1 \
	--dtype bfloat16 \
	--nougat


	trtllm-build --checkpoint_dir tmp/trt_models/${MODEL_NAME}/bfloat16/decoder \
	--output_dir tmp/trt_engines/${MODEL_NAME}/1-gpu/bfloat16/decoder \
	--paged_kv_cache disable \
	--moe_plugin disable \
	--gemm_plugin bfloat16 \
	--bert_attention_plugin bfloat16 \
	--gpt_attention_plugin bfloat16 \
	--remove_input_padding enable \
	--max_beam_width 1 \
	--max_batch_size ${MAX_BATCH_SIZE} \
	--max_seq_len ${MAX_SEQ_LEN} \
	--max_input_len ${MAX_INPUT_LEN} \
	--max_encoder_input_len $((${MAX_BATCH_SIZE} * ${MAX_ENCODER_INPUT_LEN})) # MAX_BATCH_SIZE (max_batch_size) * MAX_ENCODER_INPUT_LEN (num_visual_features)

	# 3. Generate TensorRT engines for visual components and combine everything into final pipeline.
	python ./convert/build_visual_engine.py --model_type nougat \
	--model_path tmp/hf_models/${MODEL_NAME} \
	--max_batch_size ${MAX_BATCH_SIZE}