Spaces:
Running
Running
File size: 1,825 Bytes
e06c771 9408d44 86c7e49 9408d44 e06c771 97881c0 a0e246d 0257d7c cd43969 0257d7c 86c7e49 a0e246d e06c771 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
Baichuan2-13B-Chat,6.67,10.0,23.33,23.33,16.67,20.0,26.67,30.0
ChatGLM3-6B,13.33333333,13.33333333,16.66666667,16.66666667,6.666666667,6.666666667,13.33333333,13.33333333
DevOps-Model-14B-Chat,16.67,16.67,33.33,13.33,40.0,40.0,20.0,23.33
ERNIE-Bot-4.0,16.67,16.67,20.0,20.0,36.67,36.67,23.33,23.33
GPT-3.5-turbo,13.33,13.33,20.0,26.67,20.0,20.0,16.67,23.33
GPT-4,20.0,20.0,20.0,20.0,43.33,43.33,46.67,46.67
InternLM2-Chat-20B,13.33333333,13.33333333,20.0,20.0,16.66666667,16.66666667,,
InternLM2-Chat-7B,43.33333333,43.33333333,23.33333333,23.33333333,30.0,30.0,40.0,40.0
LLaMA-2-13B,10.0,10.0,20.0,20.0,26.67,26.67,13.33,13.33
LLaMA-2-70B-Chat,3.33,3.33,20.0,20.0,23.33,23.33,16.67,16.67
LLaMA-2-7B,10.0,10.0,26.67,26.67,16.67,16.67,33.33,33.33
Mistral-7B,0.0,0.0,23.33,23.33,0.0,0.0,16.67,16.67
Qwen-14B-Chat,13.33,13.33,20.0,26.67,40.0,30.0,26.67,33.33
Qwen-72B-Chat,36.67,36.67,33.33,33.33,43.33,43.33,33.33,36.67
Yi-34B-Chat,36.67,40.0,36.67,30.0,50.0,46.67,30.0,43.33
JIUTIAN-75B-net,65.0,65.0,70.0,70.0,50.0,50.0,75.0,75.0
Claude-3-Opus,55.0,55.0,,,,,,
Deepseek-R1-Distill-Llama-8B,10.0,10.0,30.0,30.0,35.0,35.0,27.5,27.5
Deepseek-R1-Distill-Qwen-1.5B,25.0,25.0,5.0,5.0,32.5,32.5,20.0,20.0
Deepseek-R1-Distill-Qwen-14B,50.0,50.0,,,60.0,60.0,,
Deepseek-R1-Distill-Qwen-32B,60.0,60.0,,,62.5,62.5,,
Deepseek-R1-Distill-Qwen-7B,10.0,10.0,22.5,22.5,35.0,35.0,32.5,32.5
Gemma-2B,26.66667,26.66667,10.0,10.0,26.66667,26.66667,20.0,20.0
Gemma-7B,3.333333,3.333333,23.33333,23.33333,13.33333,13.33333,30.0,30.0
Meta-Llama-3-8B-Instruct,27.5,27.5,22.5,22.5,30.0,30.0,30.0,30.0
Qwen1.5-14B-Base,20.0,20.0,33.33333,33.33333,20.0,20.0,30.0,30.0
Qwen1.5-14B-Chat,23.33333,26.66667,13.33333,13.33333,26.66667,26.66667,20.0,30.0
|