OpsEval / data_v2 /inspur_en_mc_gen.csv
Junetheriver's picture
update leaderboard 2024-09-11
a0e246d
raw
history blame
1.04 kB
name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
GPT-4,85.71428571428571,85.71428571428571,87.75510204081633,87.75510204081633,90.47619047619048,90.47619047619048,91.15646258503402,91.15646258503402
GPT-4o,89.79591836734694,89.79591836734694,90.47619047619048,90.47619047619048,91.15646258503402,91.15646258503402,92.51700680272108,92.51700680272108
Baichuan2-7B-Chat,44.89795918367347,44.89795918367347,66.66666666666666,66.66666666666666,28.57142857142857,28.57142857142857,50.34013605442177,50.34013605442177
Claude-3-Opus,87.75510204081633,87.75510204081633,89.1156462585034,89.1156462585034,91.15646258503402,91.15646258503402,88.43537414965986,88.43537414965986
Qwen2-0.5B-Instruct,,,53.06122448979592,53.06122448979592,,,52.38095238095239,52.38095238095239
Qwen2-1.5B-Instruct,,,67.3469387755102,67.3469387755102,65.98639455782312,65.98639455782312,,
Qwen2-7B-Instruct,80.95238095238095,80.95238095238095,,,80.27210884353741,80.27210884353741,82.31292517006803,82.31292517006803