OpsEval / data_v2 /zjyd_en_mc_gen.csv
Junetheriver's picture
update leaderboard 2024-12-20
1922969
raw
history blame
1.01 kB
name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
GPT-3.5-turbo,87.03703703703704,84.11680851851852,88.39286,89.28571,86.11111111111111,83.65384615555556,86.60714286,88.39285714
GPT-4,87.03703703703704,87.03703703703704,,,87.03703703703704,87.03703703703704,,
Gemma-2B,,26.49573,62.5,62.5,,58.97435897,75.0,75.0
Gemma-7B,,77.77778,75.89286,75.89286,,76.06837607,,
Qwen1.5-0.5B-Base,,65.17857,75.0,75.0,,56.25,57.14285714,57.14285714
Qwen1.5-0.5B-Chat,,0.0,54.46429,53.57143,,19.64285714,16.96428571,17.85714286
Qwen1.5-1.8B-Base,,71.42857,71.42857,71.42857,,70.53571429,80.35714286,80.35714286
Qwen1.5-1.8B-Chat,,69.64286,67.85714,77.67857,,66.07142857,68.75,75.0
Qwen1.5-14B-Base,,76.92308,,,,78.63247863,83.03571429,83.03571429
Qwen1.5-14B-Chat,,23.21429,,,,78.57142857,80.35714286,83.92857143
JIUTIAN-75B-net,82.4074074074074,82.4074074074074,86.11111111111111,86.11111111111111,84.25925925925925,84.25925925925925,85.18518518518519,85.18518518518519