n1ck-guo commited on
Commit
0b780a4
·
verified ·
1 Parent(s): 8c36421

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +13 -12
README.md CHANGED
@@ -9,7 +9,7 @@ This model is an int4 model with group_size128 and sym quantization of [microsof
9
 
10
 
11
 
12
- ### Use the model
13
  ### INT4 Inference with ITREX on CPU
14
  Install the latest [intel-extension-for-transformers](
15
  https://github.com/intel/intel-extension-for-transformers)
@@ -36,15 +36,19 @@ She is curious and brave and
36
  ```
37
 
38
 
39
- ### INT4 Inference with AutoGPTQ
40
 
41
- pip install auto-gptq
42
 
43
  ```python
 
44
  from transformers import AutoModelForCausalLM, AutoTokenizer
45
  quantized_model_dir = "Intel/phi-2-int4-inc"
46
- tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir, revision="5973e3ad50beaefb937345d693639ce92ca836f9")
47
- model = AutoModelForCausalLM.from_pretrained(quantized_model_dir, device_map="auto", trust_remote_code=True, revision="5973e3ad50beaefb937345d693639ce92ca836f9")
 
 
 
 
48
  text = "There is a girl who likes adventure,"
49
  inputs = tokenizer(text, return_tensors="pt", return_attention_mask=False).to(model.device)
50
  outputs = model.generate(**inputs, max_new_tokens=50)
@@ -68,8 +72,7 @@ She is curious and brave and
68
  pip install lm-eval==0.4.2
69
 
70
  ```bash
71
- cd auto-round
72
- python3 -m auto_round --eval --model Intel/phi-2-int4-inc --device cuda:0 --tasks lambada_openai,hellaswag,piqa,winogrande,truthfulqa_mc1,openbookqa,boolq,arc_easy,arc_challenge,mmlu --batch_size 16
73
  ```
74
 
75
 
@@ -88,16 +91,14 @@ python3 -m auto_round --eval --model Intel/phi-2-int4-inc --device cuda:0 --task
88
  | arc_easy | 0.8001 | 0.8013 |
89
  | arc_challenge | 0.5282 | 0.5137 |
90
 
91
- ##
92
 
93
- ### generate the model
 
94
 
95
  Here is the sample command to generate the model
96
 
97
  ```bash
98
- cd auto-round
99
- pip install -r requirements.txt
100
- python3 -m auto_round \
101
  --model microsoft/phi-2 \
102
  --device 0 \
103
  --group_size 128 \
 
9
 
10
 
11
 
12
+ ### How To Use
13
  ### INT4 Inference with ITREX on CPU
14
  Install the latest [intel-extension-for-transformers](
15
  https://github.com/intel/intel-extension-for-transformers)
 
36
  ```
37
 
38
 
39
+ ### INT4 Inference
40
 
 
41
 
42
  ```python
43
+ ##pip install auto-round
44
  from transformers import AutoModelForCausalLM, AutoTokenizer
45
  quantized_model_dir = "Intel/phi-2-int4-inc"
46
+ tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir)
47
+ model = AutoModelForCausalLM.from_pretrained(quantized_model_dir,
48
+ device_map="auto",
49
+ trust_remote_code=True,
50
+ ## revision="5973e3a" ##AutoGPTQ format
51
+ )
52
  text = "There is a girl who likes adventure,"
53
  inputs = tokenizer(text, return_tensors="pt", return_attention_mask=False).to(model.device)
54
  outputs = model.generate(**inputs, max_new_tokens=50)
 
72
  pip install lm-eval==0.4.2
73
 
74
  ```bash
75
+ auto-round --eval --model Intel/phi-2-int4-inc --device cuda:0 --tasks lambada_openai,hellaswag,piqa,winogrande,truthfulqa_mc1,openbookqa,boolq,arc_easy,arc_challenge,mmlu --batch_size 16
 
76
  ```
77
 
78
 
 
91
  | arc_easy | 0.8001 | 0.8013 |
92
  | arc_challenge | 0.5282 | 0.5137 |
93
 
 
94
 
95
+
96
+ ### Generate the model
97
 
98
  Here is the sample command to generate the model
99
 
100
  ```bash
101
+ auto-round \
 
 
102
  --model microsoft/phi-2 \
103
  --device 0 \
104
  --group_size 128 \