Update README.md
Browse files
README.md
CHANGED
|
@@ -100,8 +100,8 @@ quantized_model = AutoModelForCausalLM.from_pretrained(untied_model_id, torch_dt
|
|
| 100 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 101 |
|
| 102 |
# Push to hub
|
| 103 |
-
|
| 104 |
-
save_to = f"{USER_ID}/
|
| 105 |
quantized_model.push_to_hub(save_to, safe_serialization=False)
|
| 106 |
tokenizer.push_to_hub(save_to)
|
| 107 |
|
|
@@ -133,7 +133,7 @@ print("Response:", output_text[0][len(prompt):])
|
|
| 133 |
|
| 134 |
# Save to disk
|
| 135 |
state_dict = quantized_model.state_dict()
|
| 136 |
-
torch.save(state_dict, "phi4-mini-
|
| 137 |
|
| 138 |
```
|
| 139 |
|
|
@@ -154,7 +154,7 @@ Need to install lm-eval from source: https://github.com/EleutherAI/lm-evaluation
|
|
| 154 |
lm_eval --model hf --model_args pretrained=microsoft/Phi-4-mini-instruct --tasks hellaswag --device cuda:0 --batch_size 64
|
| 155 |
```
|
| 156 |
|
| 157 |
-
##
|
| 158 |
```
|
| 159 |
import lm_eval
|
| 160 |
from lm_eval import evaluator
|
|
@@ -171,7 +171,7 @@ print(make_table(results))
|
|
| 171 |
|
| 172 |
| Benchmark | | |
|
| 173 |
|----------------------------------|-------------|-------------------|
|
| 174 |
-
| | Phi-4 mini-Ins | phi4-mini-
|
| 175 |
| **Popular aggregated benchmark** | | |
|
| 176 |
| mmlu (0 shot) | 66.73 | 63.11 |
|
| 177 |
| mmlu_pro (5-shot) | 44.71 | 35.31 |
|
|
@@ -208,13 +208,13 @@ Once the checkpoint is converted, we can export to ExecuTorch's PTE format with
|
|
| 208 |
PARAMS="executorch/examples/models/phi_4_mini/config.json"
|
| 209 |
python -m executorch.examples.models.llama.export_llama \
|
| 210 |
--model "phi_4_mini" \
|
| 211 |
-
--checkpoint "phi4-mini-
|
| 212 |
--params "$PARAMS" \
|
| 213 |
-kv \
|
| 214 |
--use_sdpa_with_kv_cache \
|
| 215 |
-X \
|
| 216 |
--metadata '{"get_bos_id":199999, "get_eos_ids":[200020,199999]}' \
|
| 217 |
-
--output_name="phi4-mini-
|
| 218 |
```
|
| 219 |
|
| 220 |
## Running in a mobile app
|
|
|
|
| 100 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 101 |
|
| 102 |
# Push to hub
|
| 103 |
+
MODEL_NAME = model_id.split("/")[-1]
|
| 104 |
+
save_to = f"{USER_ID}/{MODEL_NAME}-untied-8da4w"
|
| 105 |
quantized_model.push_to_hub(save_to, safe_serialization=False)
|
| 106 |
tokenizer.push_to_hub(save_to)
|
| 107 |
|
|
|
|
| 133 |
|
| 134 |
# Save to disk
|
| 135 |
state_dict = quantized_model.state_dict()
|
| 136 |
+
torch.save(state_dict, "phi4-mini-8da4w.bin")
|
| 137 |
|
| 138 |
```
|
| 139 |
|
|
|
|
| 154 |
lm_eval --model hf --model_args pretrained=microsoft/Phi-4-mini-instruct --tasks hellaswag --device cuda:0 --batch_size 64
|
| 155 |
```
|
| 156 |
|
| 157 |
+
## int8 dynamic activation and int4 weight quantization (8da4w)
|
| 158 |
```
|
| 159 |
import lm_eval
|
| 160 |
from lm_eval import evaluator
|
|
|
|
| 171 |
|
| 172 |
| Benchmark | | |
|
| 173 |
|----------------------------------|-------------|-------------------|
|
| 174 |
+
| | Phi-4 mini-Ins | phi4-mini-8da4w|
|
| 175 |
| **Popular aggregated benchmark** | | |
|
| 176 |
| mmlu (0 shot) | 66.73 | 63.11 |
|
| 177 |
| mmlu_pro (5-shot) | 44.71 | 35.31 |
|
|
|
|
| 208 |
PARAMS="executorch/examples/models/phi_4_mini/config.json"
|
| 209 |
python -m executorch.examples.models.llama.export_llama \
|
| 210 |
--model "phi_4_mini" \
|
| 211 |
+
--checkpoint "phi4-mini-8da4w-converted.bin" \
|
| 212 |
--params "$PARAMS" \
|
| 213 |
-kv \
|
| 214 |
--use_sdpa_with_kv_cache \
|
| 215 |
-X \
|
| 216 |
--metadata '{"get_bos_id":199999, "get_eos_ids":[200020,199999]}' \
|
| 217 |
+
--output_name="phi4-mini-8da4w.pte"
|
| 218 |
```
|
| 219 |
|
| 220 |
## Running in a mobile app
|