Donghao Huang
		
	commited on
		
		
					Commit 
							
							·
						
						43e183b
	
1
								Parent(s):
							
							b05a046
								
completed tgi.sh
Browse files- .env.example +10 -0
- tgi.sh +3 -4
    	
        .env.example
    CHANGED
    
    | @@ -91,3 +91,13 @@ CHUNK_OVERLAP=512 | |
| 91 | 
             
            # telegram bot
         | 
| 92 | 
             
            TELEGRAM_API_TOKEN=
         | 
| 93 | 
             
            CHAT_API_URL=http://localhost:8080/chat_sync
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 91 | 
             
            # telegram bot
         | 
| 92 | 
             
            TELEGRAM_API_TOKEN=
         | 
| 93 | 
             
            CHAT_API_URL=http://localhost:8080/chat_sync
         | 
| 94 | 
            +
             | 
| 95 | 
            +
            # template for env/tgi.conf
         | 
| 96 | 
            +
            export PORT=64300
         | 
| 97 | 
            +
             | 
| 98 | 
            +
            export NGROK_AUTHTOKEN=
         | 
| 99 | 
            +
            export NGROK_EDGE=
         | 
| 100 | 
            +
             | 
| 101 | 
            +
            export HUGGINGFACE_HUB_CACHE=$HOME/.cache/huggingface/hub/
         | 
| 102 | 
            +
            export HUGGING_FACE_HUB_TOKEN=
         | 
| 103 | 
            +
            ß
         | 
    	
        tgi.sh
    CHANGED
    
    | @@ -7,12 +7,11 @@ pwd | |
| 7 |  | 
| 8 | 
             
            uname -a
         | 
| 9 |  | 
| 10 | 
            -
             | 
| 11 | 
            -
            souce env/ngrok.conf 
         | 
| 12 |  | 
| 13 | 
             
            export MODEL_ID="meta-llama/Llama-2-7b-chat-hf"
         | 
| 14 | 
            -
            export QUANTIZE | 
| 15 |  | 
| 16 | 
             
            echo Running $MODEL_ID with TGI
         | 
| 17 |  | 
| 18 | 
            -
            text-generation-launcher --model-id $MODEL_ID --port $PORT --max-input-length 2048 --max-total-tokens 4096 --ngrok --ngrok-authtoken $NGROK_AUTHTOKEN --ngrok-edge NGROK_EDGE $QUANTIZE
         | 
|  | |
| 7 |  | 
| 8 | 
             
            uname -a
         | 
| 9 |  | 
| 10 | 
            +
            . env/tgi.conf 
         | 
|  | |
| 11 |  | 
| 12 | 
             
            export MODEL_ID="meta-llama/Llama-2-7b-chat-hf"
         | 
| 13 | 
            +
            export QUANTIZE="--quantize bitsandbytes-fp4"
         | 
| 14 |  | 
| 15 | 
             
            echo Running $MODEL_ID with TGI
         | 
| 16 |  | 
| 17 | 
            +
            text-generation-launcher --model-id $MODEL_ID --port $PORT --max-input-length 2048 --max-total-tokens 4096 --ngrok --ngrok-authtoken $NGROK_AUTHTOKEN --ngrok-edge $NGROK_EDGE $QUANTIZE
         |