Update README.md

Browse files

Files changed (1) hide show

README.md +7 -7

README.md CHANGED Viewed

@@ -100,8 +100,8 @@ quantized_model = AutoModelForCausalLM.from_pretrained(untied_model_id, torch_dt
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 # Push to hub
-USER_ID = "YOUR_USER_ID"
-save_to = f"{USER_ID}/phi4-mini-8dq4w"
 quantized_model.push_to_hub(save_to, safe_serialization=False)
 tokenizer.push_to_hub(save_to)
@@ -133,7 +133,7 @@ print("Response:", output_text[0][len(prompt):])
 # Save to disk
 state_dict = quantized_model.state_dict()
-torch.save(state_dict, "phi4-mini-8dq4w.bin")
 ```
@@ -154,7 +154,7 @@ Need to install lm-eval from source: https://github.com/EleutherAI/lm-evaluation
 lm_eval --model hf --model_args pretrained=microsoft/Phi-4-mini-instruct --tasks hellaswag --device cuda:0 --batch_size 64
 ```
-## 8dq4w
 ```
 import lm_eval
 from lm_eval import evaluator
@@ -171,7 +171,7 @@ print(make_table(results))
 | Benchmark                        |             |                   |
 |----------------------------------|-------------|-------------------|
-|                                  | Phi-4 mini-Ins | phi4-mini-8dq4w|
 | **Popular aggregated benchmark** |             |                   |
 | mmlu (0 shot)                    | 66.73       | 63.11             |
 | mmlu_pro (5-shot)                | 44.71       | 35.31             |
@@ -208,13 +208,13 @@ Once the checkpoint is converted, we can export to ExecuTorch's PTE format with
 PARAMS="executorch/examples/models/phi_4_mini/config.json"
 python -m executorch.examples.models.llama.export_llama \
   --model "phi_4_mini" \
-  --checkpoint "phi4-mini-8dq4w-converted.bin" \
   --params "$PARAMS" \
   -kv \
   --use_sdpa_with_kv_cache \
   -X \
   --metadata '{"get_bos_id":199999, "get_eos_ids":[200020,199999]}' \
-  --output_name="phi4-mini-8dq4w.pte"
 ```
 ## Running in a mobile app

 tokenizer = AutoTokenizer.from_pretrained(model_id)
 # Push to hub
+MODEL_NAME = model_id.split("/")[-1]
+save_to = f"{USER_ID}/{MODEL_NAME}-untied-8da4w"
 quantized_model.push_to_hub(save_to, safe_serialization=False)
 tokenizer.push_to_hub(save_to)
 # Save to disk
 state_dict = quantized_model.state_dict()
+torch.save(state_dict, "phi4-mini-8da4w.bin")
 ```
 lm_eval --model hf --model_args pretrained=microsoft/Phi-4-mini-instruct --tasks hellaswag --device cuda:0 --batch_size 64
 ```
+## int8 dynamic activation and int4 weight quantization (8da4w)
 ```
 import lm_eval
 from lm_eval import evaluator
 | Benchmark                        |             |                   |
 |----------------------------------|-------------|-------------------|
+|                                  | Phi-4 mini-Ins | phi4-mini-8da4w|
 | **Popular aggregated benchmark** |             |                   |
 | mmlu (0 shot)                    | 66.73       | 63.11             |
 | mmlu_pro (5-shot)                | 44.71       | 35.31             |
 PARAMS="executorch/examples/models/phi_4_mini/config.json"
 python -m executorch.examples.models.llama.export_llama \
   --model "phi_4_mini" \
+  --checkpoint "phi4-mini-8da4w-converted.bin" \
   --params "$PARAMS" \
   -kv \
   --use_sdpa_with_kv_cache \
   -X \
   --metadata '{"get_bos_id":199999, "get_eos_ids":[200020,199999]}' \
+  --output_name="phi4-mini-8da4w.pte"
 ```
 ## Running in a mobile app