Spaces:
Running
Running
File size: 2,363 Bytes
e06c771 9408d44 e06c771 9408d44 e06c771 97881c0 a0e246d 0257d7c cd43969 0257d7c 86c7e49 a0e246d e06c771 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
Baichuan-13B-Chat,11.04,11.13,26.92,28.61,14.35,13.22,31.69,33.97
ChatGLM2-6B,23.09,23.12,24.22,24.08,30.46,30.46,35.97,35.9
ChatGLM3-6B,32.6,32.6,35.4,35.4,28.3,28.3,40.9,40.9
Chinese-Alpaca-2-13B,22.69,22.69,24.59,24.59,40.52,40.52,40.73,40.73
Chinese-LLaMA-2-13B,17.98,17.98,17.83,17.83,31.66,31.66,36.24,36.24
DevOps-Model-14B-Chat,41.04,42.7,48.71,53.57,56.85,57.25,51.3,54.29
ERNIE-Bot-4.0,45.99,45.99,48.98,48.98,46.0,46.0,54.0,54.0
GLM3-turbo,43.0,43.0,,,,,,
GLM4,50.0,50.0,,,,,,
GPT-3.5-turbo,37.06,36.83,37.56,39.25,39.42,39.77,41.96,42.15
GPT-4,,,57.35,62.11,,,61.2,65.68
InternLM-7B,27.81,27.81,19.95,19.95,24.18,24.18,35.35,35.35
InternLM2-Chat-20B,44.6,44.6,47.0,47.0,62.2,62.2,38.3,38.3
InternLM2-Chat-7B,38.8,38.8,44.6,44.6,46.0,46.0,35.8,35.8
LLaMA-2-13B,25.43,27.16,29.17,29.99,36.56,36.15,37.7,39.02
LLaMA-2-70B-Chat,24.38,24.38,43.63,43.63,44.65,44.65,48.84,48.84
LLaMA-2-7B,24.09,23.47,28.69,29.26,29.94,30.03,31.35,31.93
Mistral-7B,1.27,1.27,42.05,42.05,30.72,30.72,46.44,46.44
Qwen-14B-Chat,41.71,41.44,45.58,47.98,53.52,49.92,54.72,58.85
Qwen-72B-Chat,64.79,64.79,65.79,65.72,70.19,70.19,68.31,68.38
Qwen-7B-Chat,36.28,36.5,33.18,33.51,41.58,40.59,31.48,31.46
Yi-34B-Chat,64.91,64.58,62.77,65.51,70.85,70.92,48.77,47.97
JIUTIAN-75B-net,64.2,64.2,66.60000000000001,66.60000000000001,64.0,64.0,67.80000000000001,67.80000000000001
Claude-3-Opus,51.4,51.4,,,,,,
Deepseek-R1-Distill-Llama-8B,25.0,25.0,32.1,32.1,30.5,30.5,34.6,34.6
Deepseek-R1-Distill-Qwen-1.5B,28.299999999999997,28.299999999999997,24.6,24.6,23.1,23.1,31.300000000000004,31.300000000000004
Deepseek-R1-Distill-Qwen-14B,46.5,46.5,,,52.3,52.3,,
Deepseek-R1-Distill-Qwen-32B,65.5,65.5,,,67.39999999999999,67.39999999999999,,
Deepseek-R1-Distill-Qwen-7B,30.900000000000002,30.900000000000002,29.400000000000002,29.400000000000002,32.6,32.6,35.599999999999994,35.599999999999994
Gemma-2B,25.6,25.6,28.3,28.3,19.1,19.1,35.5,35.5
Gemma-7B,27.3,27.3,35.4,35.4,17.3,17.3,44.5,44.5
Meta-Llama-3-70B-Instruct,31.1,31.1,37.4,37.4,51.10000000000001,51.10000000000001,36.900000000000006,36.900000000000006
Meta-Llama-3-8B-Instruct,31.1,31.1,34.3,34.3,36.0,36.0,37.1,37.1
Qwen1.5-14B-Base,49.1,49.1,49.9,49.9,62.5,62.5,41.3,41.3
Qwen1.5-14B-Chat,38.6,38.9,48.8,50.5,54.6,55.2,52.1,52.7
|