compressed-llm
/

vicuna-13b-v1.3-gptq

jyhong836 commited on Sep 3, 2023

Commit

17895d8

1 Parent(s): 886ca35

Upload README.md with huggingface_hub

Files changed (1) hide show

README.md CHANGED Viewed

@@ -14,6 +14,7 @@ pip install torch==2.0.0+cu117 torchvision==0.15.1+cu117 torchaudio==2.0.1 --ind
 pip install transformers==4.31.0
 pip install accelerate
 pip install auto-gptq  # for gptq
 ```
 How to use pruned models
@@ -60,16 +61,18 @@ How to use gptq models
 ```python
 from transformers import AutoTokenizer
 from auto_gptq import AutoGPTQForCausalLM
 model_path = 'vita-group/vicuna-7b-v1.3_gptq'
 tokenizer_path = 'lmsys/vicuna-7b-v1.3'
-revision = '2bit_128g'
 model = AutoGPTQForCausalLM.from_quantized(
         model_path,
         # inject_fused_attention=False, # or
         disable_exllama=True,
         device_map='auto',
-        revision=revision,
     )
 tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, trust_remote_code=True)
 input_ids = tokenizer('Hello! I am a VITA-compressed-LLM chatbot!', return_tensors='pt').input_ids.to('cuda')
 outputs = model.generate(input_ids=input_ids, max_length=128)

 pip install transformers==4.31.0
 pip install accelerate
 pip install auto-gptq  # for gptq
+pip install sentencepiece
 ```
 How to use pruned models
 ```python
 from transformers import AutoTokenizer
 from auto_gptq import AutoGPTQForCausalLM
+# model_path = 'vita-group/llama-2-7b_wanda_2_4_gptq_4bit_128g'
+# tokenizer_path = 'meta-llama/Llama-2-7b-hf'
 model_path = 'vita-group/vicuna-7b-v1.3_gptq'
 tokenizer_path = 'lmsys/vicuna-7b-v1.3'
 model = AutoGPTQForCausalLM.from_quantized(
         model_path,
         # inject_fused_attention=False, # or
         disable_exllama=True,
         device_map='auto',
+        revision='2bit_128g',
     )
+from transformers import AutoTokenizer
 tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, trust_remote_code=True)
 input_ids = tokenizer('Hello! I am a VITA-compressed-LLM chatbot!', return_tensors='pt').input_ids.to('cuda')
 outputs = model.generate(input_ids=input_ids, max_length=128)