Junetheriver commited on
Commit
ce190c5
·
1 Parent(s): 4471cca

update leaderboard 2024-09-06

Browse files
data_v2/inspur_en_mc_gen.csv CHANGED
@@ -1,7 +1,7 @@
1
- name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
- Gpt-4,85.71428571428571,85.71428571428571,87.75510204081633,87.75510204081633,90.47619047619048,90.47619047619048,91.15646258503402,91.15646258503402
3
- GPT-4o,89.79591836734694,89.79591836734694,90.47619047619048,90.47619047619048,91.15646258503402,91.15646258503402,92.51700680272108,92.51700680272108
4
- Baichuan2-7B-Chat,44.89795918367347,44.89795918367347,66.66666666666666,66.66666666666666,28.57142857142857,28.57142857142857,50.34013605442177,50.34013605442177
5
- Claude-3-Opus,87.75510204081633,87.75510204081633,89.1156462585034,89.1156462585034,91.15646258503402,91.15646258503402,88.43537414965986,88.43537414965986
6
- Qwen2-0.5B-Instruct,80.95238095238095,80.95238095238095,53.06122448979592,53.06122448979592,65.98639455782312,65.98639455782312,52.38095238095239,52.38095238095239
7
- Qwen2-7B-Instruct,,,67.3469387755102,67.3469387755102,80.27210884353741,80.27210884353741,82.31292517006803,82.31292517006803
 
1
+ name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
2
+ Gpt-4,85.71428571428571,87.75510204081633,90.47619047619048,91.15646258503402
3
+ GPT-4o,89.79591836734694,90.47619047619048,91.15646258503402,92.51700680272108
4
+ Baichuan2-7B-Chat,44.89795918367347,66.66666666666666,28.57142857142857,50.34013605442177
5
+ Claude-3-Opus,87.75510204081633,89.1156462585034,91.15646258503402,88.43537414965986
6
+ Qwen2-0.5B-Instruct,80.95238095238095,53.06122448979592,65.98639455782312,52.38095238095239
7
+ Qwen2-7B-Instruct,,67.3469387755102,80.27210884353741,82.31292517006803
data_v2/inspur_zh_mc_gen.csv CHANGED
@@ -1,6 +1,6 @@
1
- name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
- Gpt-4,87.07482993197279,87.07482993197279,87.07482993197279,87.07482993197279,87.07482993197279,87.07482993197279,91.15646258503402,91.15646258503402
3
- GPT-4o,62.585034013605444,62.585034013605444,89.1156462585034,89.1156462585034,89.1156462585034,89.1156462585034,91.15646258503402,91.15646258503402
4
- Claude-3-Opus,83.6734693877551,83.6734693877551,85.03401360544217,85.03401360544217,42.857142857142854,42.857142857142854,91.83673469387756,91.83673469387756
5
- Qwen2-0.5B-Instruct,56.4625850340136,56.4625850340136,68.02721088435374,68.02721088435374,87.75510204081633,87.75510204081633,57.14285714285714,57.14285714285714
6
- Qwen2-7B-Instruct,76.19047619047619,76.19047619047619,80.95238095238095,80.95238095238095,76.87074829931973,76.87074829931973,,
 
1
+ name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
2
+ Gpt-4,87.07482993197279,87.07482993197279,87.07482993197279,91.15646258503402
3
+ GPT-4o,62.585034013605444,89.1156462585034,89.1156462585034,91.15646258503402
4
+ Claude-3-Opus,83.6734693877551,85.03401360544217,42.857142857142854,91.83673469387756
5
+ Qwen2-0.5B-Instruct,56.4625850340136,68.02721088435374,87.75510204081633,57.14285714285714
6
+ Qwen2-7B-Instruct,76.19047619047619,80.95238095238095,76.87074829931973,
data_v2/lenovo_zh_mc_gen.csv CHANGED
@@ -12,5 +12,5 @@ Mistral-7B,47.5,47.5,62.5,62.5,35.0,35.0,60.0,60.0
12
  Qwen-14B-Chat,70.0,67.5,70.0,67.5,70.0,65.0,65.0,67.5
13
  Qwen-72B-Chat,72.5,72.5,75.0,75.0,75.0,75.0,75.0,75.0
14
  Yi-34B-Chat,75.0,75.0,87.5,82.5,62.5,57.5,52.5,52.5
15
- Meta-Llama-3-8B-Instruct,71.42857142857143,71.42857142857143,44.285714285714285,44.285714285714285,45.714285714285715,45.714285714285715,32.857142857142854,32.857142857142854
16
- Meta-Llama-3-8B-Instruct,47.14285714285714,47.14285714285714,,,,,,
 
12
  Qwen-14B-Chat,70.0,67.5,70.0,67.5,70.0,65.0,65.0,67.5
13
  Qwen-72B-Chat,72.5,72.5,75.0,75.0,75.0,75.0,75.0,75.0
14
  Yi-34B-Chat,75.0,75.0,87.5,82.5,62.5,57.5,52.5,52.5
15
+ Meta-Llama-3-8B-Instruct,,71.42857142857143,,44.285714285714285,,45.714285714285715,,32.857142857142854
16
+ Meta-Llama-3-8B-Instruct,,47.14285714285714,,,,,,
data_v2/zte_en_mc_gen.csv CHANGED
@@ -23,6 +23,6 @@ gemma_2b,20.1,49.599999999999994,24.2,24.2,31.2,31.2,35.5,35.5
23
  gemma_7b,23.1,20.1,34.4,34.4,21.4,21.4,33.1,33.1
24
  Qwen1.5-14B-Base,34.0,23.1,42.8,63.4,57.9,37.6,40.2,59.0
25
  Qwen1.5-14B-Chat,34.5,38.9,41.7,35.4,33.2,19.7,46.2,32.9
26
- Qwen1.5-14B-Base,24.7,24.7,,42.8,,57.9,,40.2
27
  Qwen1.5-14B-Chat,,34.0,,41.1,,34.7,,47.4
28
  Qwen1.5-14B-Chat,,35.6,,,,,,
 
23
  gemma_7b,23.1,20.1,34.4,34.4,21.4,21.4,33.1,33.1
24
  Qwen1.5-14B-Base,34.0,23.1,42.8,63.4,57.9,37.6,40.2,59.0
25
  Qwen1.5-14B-Chat,34.5,38.9,41.7,35.4,33.2,19.7,46.2,32.9
26
+ Qwen1.5-14B-Base,,24.7,,42.8,,57.9,,40.2
27
  Qwen1.5-14B-Chat,,34.0,,41.1,,34.7,,47.4
28
  Qwen1.5-14B-Chat,,35.6,,,,,,
data_v2/zte_zh_mc_gen.csv CHANGED
@@ -25,6 +25,6 @@ gemma_2b,25.6,51.4,28.3,28.3,19.1,19.1,35.5,35.5
25
  gemma_7b,27.3,25.6,35.4,35.4,17.3,17.3,44.5,44.5
26
  Qwen1.5-14B-Base,49.1,27.3,49.9,37.4,62.5,51.10000000000001,41.3,36.900000000000006
27
  Qwen1.5-14B-Chat,38.6,31.1,48.8,34.3,54.6,36.0,52.1,37.1
28
- Qwen1.5-14B-Base,31.1,31.1,,49.9,,62.5,,41.3
29
  Qwen1.5-14B-Chat,,49.1,,50.5,,55.2,,52.7
30
  Qwen1.5-14B-Chat,,38.9,,,,,,
 
25
  gemma_7b,27.3,25.6,35.4,35.4,17.3,17.3,44.5,44.5
26
  Qwen1.5-14B-Base,49.1,27.3,49.9,37.4,62.5,51.10000000000001,41.3,36.900000000000006
27
  Qwen1.5-14B-Chat,38.6,31.1,48.8,34.3,54.6,36.0,52.1,37.1
28
+ Qwen1.5-14B-Base,,31.1,,49.9,,62.5,,41.3
29
  Qwen1.5-14B-Chat,,49.1,,50.5,,55.2,,52.7
30
  Qwen1.5-14B-Chat,,38.9,,,,,,