Update README.md
Browse files
README.md
CHANGED
@@ -192,27 +192,27 @@ lm_eval --model hf --model_args pretrained=microsoft/Phi-4-mini-instruct --tasks
|
|
192 |
lm_eval --model hf --model_args pretrained=pytorch/Phi-4-mini-instruct-float8dq --tasks hellaswag --device cuda:0 --batch_size 8
|
193 |
```
|
194 |
|
195 |
-
| Benchmark | |
|
196 |
-
|
197 |
-
| | Phi-4
|
198 |
-
| **Popular aggregated benchmark** | |
|
199 |
-
| mmlu (0-shot) | 66.73 | 66.61
|
200 |
-
| mmlu_pro (5-shot) | 46.43 | 44.58
|
201 |
-
| **Reasoning** | |
|
202 |
-
| arc_challenge (0-shot) | 56.91 | 56.66
|
203 |
-
| gpqa_main_zeroshot | 30.13 | 29.46
|
204 |
-
| HellaSwag | 54.57 | 54.55
|
205 |
-
| openbookqa | 33.00 | 33.60
|
206 |
-
| piqa (0-shot) | 77.64 | 77.48
|
207 |
-
| social_iqa | 49.59 | 49.28
|
208 |
-
| truthfulqa_mc2 (0-shot) | 48.39 | 48.09
|
209 |
-
| winogrande (0-shot) | 71.11 | 72.77
|
210 |
-
| **Multilingual** | |
|
211 |
-
| mgsm_en_cot_en | 60.8 | 60.0
|
212 |
-
| **Math** | |
|
213 |
-
| gsm8k (5-shot) | 81.88 | 80.89
|
214 |
-
| mathqa (0-shot) | 42.31 | 42.51
|
215 |
-
| **Overall** | **55.35** | **55.11**
|
216 |
|
217 |
# Peak Memory Usage
|
218 |
|
|
|
192 |
lm_eval --model hf --model_args pretrained=pytorch/Phi-4-mini-instruct-float8dq --tasks hellaswag --device cuda:0 --batch_size 8
|
193 |
```
|
194 |
|
195 |
+
| Benchmark | | |
|
196 |
+
|----------------------------------|----------------|-------------------------------|
|
197 |
+
| | Phi-4-mini-ins | Phi-4-mini-instruct-float8dq |
|
198 |
+
| **Popular aggregated benchmark** | | |
|
199 |
+
| mmlu (0-shot) | 66.73 | 66.61 |
|
200 |
+
| mmlu_pro (5-shot) | 46.43 | 44.58 |
|
201 |
+
| **Reasoning** | | |
|
202 |
+
| arc_challenge (0-shot) | 56.91 | 56.66 |
|
203 |
+
| gpqa_main_zeroshot | 30.13 | 29.46 |
|
204 |
+
| HellaSwag | 54.57 | 54.55 |
|
205 |
+
| openbookqa | 33.00 | 33.60 |
|
206 |
+
| piqa (0-shot) | 77.64 | 77.48 |
|
207 |
+
| social_iqa | 49.59 | 49.28 |
|
208 |
+
| truthfulqa_mc2 (0-shot) | 48.39 | 48.09 |
|
209 |
+
| winogrande (0-shot) | 71.11 | 72.77 |
|
210 |
+
| **Multilingual** | | |
|
211 |
+
| mgsm_en_cot_en | 60.8 | 60.0 |
|
212 |
+
| **Math** | | |
|
213 |
+
| gsm8k (5-shot) | 81.88 | 80.89 |
|
214 |
+
| mathqa (0-shot) | 42.31 | 42.51 |
|
215 |
+
| **Overall** | **55.35** | **55.11** |
|
216 |
|
217 |
# Peak Memory Usage
|
218 |
|