Spaces:
Running
Running
Commit
·
fe35dbb
1
Parent(s):
00eb906
update leaderboard 2024-09-06
Browse files- app.py +5 -1
- data_v2/bosc_zh_mc_gen.csv +22 -22
- data_v2/gtja_zh_mc_gen.csv +22 -22
- data_v2/huaweicloud_zh_mc_gen.csv +22 -22
- data_v2/inspur_en_mc_gen.csv +8 -8
- data_v2/inspur_zh_mc_gen.csv +8 -8
- data_v2/lenovo_zh_mc_gen.csv +16 -16
- data_v2/network_en_mc_gen.csv +29 -29
- data_v2/network_zh_mc_gen.csv +32 -32
- data_v2/oracle_en_mc_gen.csv +29 -29
- data_v2/oracle_zh_mc_gen.csv +28 -28
- data_v2/pufa_zh_mc_gen.csv +22 -22
- data_v2/rzy_zh_mc_gen.csv +22 -22
- data_v2/zabbix_zh_mc_gen.csv +22 -22
- data_v2/zjyd_zh_mc_gen.csv +29 -29
- data_v2/zte_en_mc_gen.csv +28 -28
- data_v2/zte_zh_mc_gen.csv +30 -30
app.py
CHANGED
@@ -40,7 +40,11 @@ def process_mc_df(df, shot=None):
|
|
40 |
# 将zero_naive, zero_self_con, zero_cot, zero_cot_self_con, few_naive, few_self_con, few_cot, few_cot_self_con列重新组织成MultiIndex,一层为Zeroshot, Fewshot,一层为Naive, Self-Consistency, CoT, CoT+Self-Consistency
|
41 |
df = df.set_index("Model")
|
42 |
# df = df.stack().unstack()
|
43 |
-
|
|
|
|
|
|
|
|
|
44 |
# 保留shot的列,比如如果shot=Zeroshot那么只有Zeroshot的列会被保留
|
45 |
if shot:
|
46 |
df = df[shot]
|
|
|
40 |
# 将zero_naive, zero_self_con, zero_cot, zero_cot_self_con, few_naive, few_self_con, few_cot, few_cot_self_con列重新组织成MultiIndex,一层为Zeroshot, Fewshot,一层为Naive, Self-Consistency, CoT, CoT+Self-Consistency
|
41 |
df = df.set_index("Model")
|
42 |
# df = df.stack().unstack()
|
43 |
+
try:
|
44 |
+
df.columns = pd.MultiIndex.from_tuples([("Zeroshot", "Naive"), ("Zeroshot", "SC"), ("Zeroshot", "CoT"), ("Zeroshot", "CoT+SC"), ("Fewshot", "Naive"), ("Fewshot", "SC"), ("Fewshot", "CoT"), ("Fewshot", "CoT+SC")])
|
45 |
+
except:
|
46 |
+
print(df)
|
47 |
+
raise
|
48 |
# 保留shot的列,比如如果shot=Zeroshot那么只有Zeroshot的列会被保留
|
49 |
if shot:
|
50 |
df = df[shot]
|
data_v2/bosc_zh_mc_gen.csv
CHANGED
@@ -1,22 +1,22 @@
|
|
1 |
-
name,
|
2 |
-
Baichuan2-13B-Chat,
|
3 |
-
Chatglm3-6B,35.0,
|
4 |
-
Devops-Model-14B-Chat,
|
5 |
-
Ernie-Bot-4.0,57.5,
|
6 |
-
Gpt-3.5-Turbo,
|
7 |
-
GPT-4,57.5,57.5,
|
8 |
-
Internlm2-Chat-20B,47.5
|
9 |
-
Internlm2-Chat-7B,60.0,
|
10 |
-
Llama-2-13B,42.5,
|
11 |
-
Llama-2-70B-Chat,0.0,
|
12 |
-
Llama-2-7B,32.5,
|
13 |
-
Mistral-7B,0.0,
|
14 |
-
Qwen-14B-Chat,
|
15 |
-
Qwen-72B-Chat,50.0,
|
16 |
-
Yi-34B-Chat,55.0,
|
17 |
-
Claude-3-Opus
|
18 |
-
gemma_2b,37.5,
|
19 |
-
gemma_7b,32.5,
|
20 |
-
Meta-Llama-3-8B-Instruct
|
21 |
-
Qwen1.5-14B-Base,47.5,
|
22 |
-
Qwen1.5-14B-Chat,
|
|
|
1 |
+
name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
|
2 |
+
Baichuan2-13B-Chat,40.0,52.5,37.5,45.0
|
3 |
+
Chatglm3-6B,35.0,50.0,47.5,45.0
|
4 |
+
Devops-Model-14B-Chat,27.5,52.5,50.0,62.5
|
5 |
+
Ernie-Bot-4.0,57.5,60.0,52.5,57.5
|
6 |
+
Gpt-3.5-Turbo,47.5,55.0,40.0,55.0
|
7 |
+
GPT-4,57.5,57.5,52.5,62.5
|
8 |
+
Internlm2-Chat-20B,47.5,,47.5,
|
9 |
+
Internlm2-Chat-7B,60.0,57.5,55.0,62.5
|
10 |
+
Llama-2-13B,42.5,50.0,50.0,42.5
|
11 |
+
Llama-2-70B-Chat,0.0,57.5,25.0,45.0
|
12 |
+
Llama-2-7B,32.5,45.0,45.0,45.0
|
13 |
+
Mistral-7B,0.0,37.5,20.0,50.0
|
14 |
+
Qwen-14B-Chat,45.0,47.5,47.5,57.5
|
15 |
+
Qwen-72B-Chat,50.0,47.5,45.0,60.0
|
16 |
+
Yi-34B-Chat,55.0,67.5,50.0,55.0
|
17 |
+
Claude-3-Opus,72.85714285714286,,,
|
18 |
+
gemma_2b,37.5,40.0,32.5,40.0
|
19 |
+
gemma_7b,32.5,62.5,40.0,50.0
|
20 |
+
Meta-Llama-3-8B-Instruct,52.85714285714286,47.14285714285714,52.85714285714286,30.0
|
21 |
+
Qwen1.5-14B-Base,47.5,50.0,47.5,45.0
|
22 |
+
Qwen1.5-14B-Chat,47.5,72.5,55.0,60.0
|
data_v2/gtja_zh_mc_gen.csv
CHANGED
@@ -1,22 +1,22 @@
|
|
1 |
-
name,
|
2 |
-
Baichuan2-13B-Chat,41.76,
|
3 |
-
Chatglm3-6B,43.95604396,
|
4 |
-
Devops-Model-14B-Chat,
|
5 |
-
Ernie-Bot-4.0,68.13,
|
6 |
-
Gpt-3.5-Turbo,
|
7 |
-
GPT-4,68.13,
|
8 |
-
Internlm2-Chat-20B,56.04395604
|
9 |
-
Internlm2-Chat-7B,56.04395604,
|
10 |
-
Llama-2-13B,30.77,
|
11 |
-
Llama-2-70B-Chat,6.59,
|
12 |
-
Llama-2-7B,28.57,
|
13 |
-
Mistral-7B,5.49,
|
14 |
-
Qwen-14B-Chat,47.25,
|
15 |
-
Qwen-72B-Chat,71.43,
|
16 |
-
Yi-34B-Chat,
|
17 |
-
Claude-3-Opus
|
18 |
-
gemma_2b,30.76923,
|
19 |
-
gemma_7b,29.67033,
|
20 |
-
Meta-Llama-3-8B-Instruct
|
21 |
-
Qwen1.5-14B-Base,53.84615,
|
22 |
-
Qwen1.5-14B-Chat,
|
|
|
1 |
+
name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
|
2 |
+
Baichuan2-13B-Chat,41.76,60.44,38.46,51.65
|
3 |
+
Chatglm3-6B,43.95604396,53.84615385,43.95604396,47.25274725
|
4 |
+
Devops-Model-14B-Chat,38.46,49.45,59.34,63.74
|
5 |
+
Ernie-Bot-4.0,68.13,64.84,65.93,68.13
|
6 |
+
Gpt-3.5-Turbo,52.75,62.64,52.75,58.24
|
7 |
+
GPT-4,68.13,67.03,70.33,71.43
|
8 |
+
Internlm2-Chat-20B,56.04395604,,65.93406593,
|
9 |
+
Internlm2-Chat-7B,56.04395604,59.34065934,54.94505495,51.64835165
|
10 |
+
Llama-2-13B,30.77,47.25,47.25,42.86
|
11 |
+
Llama-2-70B-Chat,6.59,48.35,19.78,49.45
|
12 |
+
Llama-2-7B,28.57,45.05,42.86,45.05
|
13 |
+
Mistral-7B,5.49,47.25,14.29,38.46
|
14 |
+
Qwen-14B-Chat,47.25,54.95,54.95,61.54
|
15 |
+
Qwen-72B-Chat,71.43,67.03,70.33,74.73
|
16 |
+
Yi-34B-Chat,74.73,73.63,70.33,47.25
|
17 |
+
Claude-3-Opus,41.508438818565395,,,
|
18 |
+
gemma_2b,30.76923,43.95604,32.96703,29.67033
|
19 |
+
gemma_7b,29.67033,56.04396,34.06593,50.54945
|
20 |
+
Meta-Llama-3-8B-Instruct,36.550632911392405,38.08016877637131,43.24894514767932,34.28270042194093
|
21 |
+
Qwen1.5-14B-Base,53.84615,63.73626,68.13187,42.85714
|
22 |
+
Qwen1.5-14B-Chat,54.94505,68.13187,57.14286,62.63736
|
data_v2/huaweicloud_zh_mc_gen.csv
CHANGED
@@ -1,22 +1,22 @@
|
|
1 |
-
name,
|
2 |
-
Baichuan2-13B-Chat,
|
3 |
-
Chatglm3-6B,13.33333333,
|
4 |
-
Devops-Model-14B-Chat,16.67,
|
5 |
-
Ernie-Bot-4.0,16.67,
|
6 |
-
Gpt-3.5-Turbo,13.33,
|
7 |
-
GPT-4,20.0,20.0,
|
8 |
-
Internlm2-Chat-20B,13.33333333,
|
9 |
-
Internlm2-Chat-7B,43.33333333,
|
10 |
-
Llama-2-13B,10.0,
|
11 |
-
Llama-2-70B-Chat,3.33,
|
12 |
-
Llama-2-7B,10.0,
|
13 |
-
Mistral-7B,0.0,
|
14 |
-
Qwen-14B-Chat,13.33,
|
15 |
-
Qwen-72B-Chat,36.67,
|
16 |
-
Yi-34B-Chat,
|
17 |
-
Claude-3-Opus
|
18 |
-
gemma_2b,26.66667,
|
19 |
-
gemma_7b,3.333333,
|
20 |
-
Meta-Llama-3-8B-Instruct
|
21 |
-
Qwen1.5-14B-Base,20.0,
|
22 |
-
Qwen1.5-14B-Chat,
|
|
|
1 |
+
name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
|
2 |
+
Baichuan2-13B-Chat,10.0,23.33,20.0,30.0
|
3 |
+
Chatglm3-6B,13.33333333,16.66666667,6.666666667,13.33333333
|
4 |
+
Devops-Model-14B-Chat,16.67,13.33,40.0,23.33
|
5 |
+
Ernie-Bot-4.0,16.67,20.0,36.67,23.33
|
6 |
+
Gpt-3.5-Turbo,13.33,26.67,20.0,23.33
|
7 |
+
GPT-4,20.0,20.0,43.33,46.67
|
8 |
+
Internlm2-Chat-20B,13.33333333,20.0,16.66666667,
|
9 |
+
Internlm2-Chat-7B,43.33333333,23.33333333,30.0,40.0
|
10 |
+
Llama-2-13B,10.0,20.0,26.67,13.33
|
11 |
+
Llama-2-70B-Chat,3.33,20.0,23.33,16.67
|
12 |
+
Llama-2-7B,10.0,26.67,16.67,33.33
|
13 |
+
Mistral-7B,0.0,23.33,0.0,16.67
|
14 |
+
Qwen-14B-Chat,13.33,26.67,30.0,33.33
|
15 |
+
Qwen-72B-Chat,36.67,33.33,43.33,36.67
|
16 |
+
Yi-34B-Chat,40.0,30.0,46.67,43.33
|
17 |
+
Claude-3-Opus,55.0,,,
|
18 |
+
gemma_2b,26.66667,10.0,26.66667,20.0
|
19 |
+
gemma_7b,3.333333,23.33333,13.33333,30.0
|
20 |
+
Meta-Llama-3-8B-Instruct,27.5,22.5,30.0,30.0
|
21 |
+
Qwen1.5-14B-Base,20.0,33.33333,20.0,30.0
|
22 |
+
Qwen1.5-14B-Chat,26.66667,13.33333,26.66667,30.0
|
data_v2/inspur_en_mc_gen.csv
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
-
name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
|
2 |
-
Gpt-4,85.71428571428571,87.75510204081633,90.47619047619048,91.15646258503402
|
3 |
-
GPT-4o,89.79591836734694,90.47619047619048,91.15646258503402,92.51700680272108
|
4 |
-
Baichuan2-7B-Chat,44.89795918367347,66.66666666666666,28.57142857142857,50.34013605442177
|
5 |
-
Claude-3-Opus,87.75510204081633,89.1156462585034,91.15646258503402,88.43537414965986
|
6 |
-
Qwen2-0.5B-Instruct
|
7 |
-
Qwen2-1.5B-Instruct
|
8 |
-
Qwen2-7B-Instruct,80.95238095238095
|
|
|
1 |
+
name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
|
2 |
+
Gpt-4,85.71428571428571,85.71428571428571,87.75510204081633,87.75510204081633,90.47619047619048,90.47619047619048,91.15646258503402,91.15646258503402
|
3 |
+
GPT-4o,89.79591836734694,89.79591836734694,90.47619047619048,90.47619047619048,91.15646258503402,91.15646258503402,92.51700680272108,92.51700680272108
|
4 |
+
Baichuan2-7B-Chat,44.89795918367347,44.89795918367347,66.66666666666666,66.66666666666666,28.57142857142857,28.57142857142857,50.34013605442177,50.34013605442177
|
5 |
+
Claude-3-Opus,87.75510204081633,87.75510204081633,89.1156462585034,89.1156462585034,91.15646258503402,91.15646258503402,88.43537414965986,88.43537414965986
|
6 |
+
Qwen2-0.5B-Instruct,,,53.06122448979592,53.06122448979592,,,52.38095238095239,52.38095238095239
|
7 |
+
Qwen2-1.5B-Instruct,,,67.3469387755102,67.3469387755102,65.98639455782312,65.98639455782312,,
|
8 |
+
Qwen2-7B-Instruct,80.95238095238095,80.95238095238095,,,80.27210884353741,80.27210884353741,82.31292517006803,82.31292517006803
|
data_v2/inspur_zh_mc_gen.csv
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
-
name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
|
2 |
-
Gpt-4
|
3 |
-
GPT-4o,87.07482993197279,89.1156462585034,89.1156462585034,91.15646258503402
|
4 |
-
Baichuan2-7B-Chat,62.585034013605444
|
5 |
-
Claude-3-Opus,83.6734693877551,85.03401360544217,87.75510204081633,91.83673469387756
|
6 |
-
Qwen2-0.5B-Instruct,56.4625850340136
|
7 |
-
Qwen2-1.5B-Instruct
|
8 |
-
Qwen2-7B-Instruct,76.19047619047619,80.95238095238095,76.87074829931973,
|
|
|
1 |
+
name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
|
2 |
+
Gpt-4,,,87.07482993197279,87.07482993197279,87.07482993197279,87.07482993197279,91.15646258503402,91.15646258503402
|
3 |
+
GPT-4o,87.07482993197279,87.07482993197279,89.1156462585034,89.1156462585034,89.1156462585034,89.1156462585034,91.15646258503402,91.15646258503402
|
4 |
+
Baichuan2-7B-Chat,62.585034013605444,62.585034013605444,,,42.857142857142854,42.857142857142854,,
|
5 |
+
Claude-3-Opus,83.6734693877551,83.6734693877551,85.03401360544217,85.03401360544217,87.75510204081633,87.75510204081633,91.83673469387756,91.83673469387756
|
6 |
+
Qwen2-0.5B-Instruct,56.4625850340136,56.4625850340136,,,,,57.14285714285714,57.14285714285714
|
7 |
+
Qwen2-1.5B-Instruct,,,68.02721088435374,68.02721088435374,,,,
|
8 |
+
Qwen2-7B-Instruct,76.19047619047619,76.19047619047619,80.95238095238095,80.95238095238095,76.87074829931973,76.87074829931973,,
|
data_v2/lenovo_zh_mc_gen.csv
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
-
name,
|
2 |
-
Baichuan2-13B-Chat,
|
3 |
-
Chatglm3-6B,60.0,60.0,
|
4 |
-
Devops-Model-14B-Chat,
|
5 |
-
Ernie-Bot-4.0,75.0,
|
6 |
-
Gpt-3.5-Turbo,
|
7 |
-
GPT-4,77.5,
|
8 |
-
Llama-2-13B,45.0,
|
9 |
-
Llama-2-70B-Chat,22.5,
|
10 |
-
Llama-2-7B,32.5,
|
11 |
-
Mistral-7B,47.5,
|
12 |
-
Qwen-14B-Chat,
|
13 |
-
Qwen-72B-Chat,72.5,
|
14 |
-
Yi-34B-Chat,75.0,
|
15 |
-
Claude-3-Opus
|
16 |
-
Meta-Llama-3-8B-Instruct
|
|
|
1 |
+
name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
|
2 |
+
Baichuan2-13B-Chat,60.0,67.5,60.0,67.5
|
3 |
+
Chatglm3-6B,60.0,60.0,55.0,60.0
|
4 |
+
Devops-Model-14B-Chat,67.5,57.5,70.0,70.0
|
5 |
+
Ernie-Bot-4.0,75.0,77.5,75.0,82.5
|
6 |
+
Gpt-3.5-Turbo,62.5,70.0,57.5,62.5
|
7 |
+
GPT-4,77.5,82.5,77.5,82.5
|
8 |
+
Llama-2-13B,45.0,62.5,60.0,55.0
|
9 |
+
Llama-2-70B-Chat,22.5,75.0,20.0,57.5
|
10 |
+
Llama-2-7B,32.5,45.0,60.0,55.0
|
11 |
+
Mistral-7B,47.5,62.5,35.0,60.0
|
12 |
+
Qwen-14B-Chat,67.5,67.5,65.0,67.5
|
13 |
+
Qwen-72B-Chat,72.5,75.0,75.0,75.0
|
14 |
+
Yi-34B-Chat,75.0,82.5,57.5,52.5
|
15 |
+
Claude-3-Opus,71.42857142857143,,,
|
16 |
+
Meta-Llama-3-8B-Instruct,47.14285714285714,44.285714285714285,45.714285714285715,32.857142857142854
|
data_v2/network_en_mc_gen.csv
CHANGED
@@ -1,29 +1,29 @@
|
|
1 |
-
name,
|
2 |
-
Aquilachat2-34B,36.63,
|
3 |
-
Baichuan-13B-Chat,
|
4 |
-
Baichuan2-13B-Chat,
|
5 |
-
Chatglm2-6B,24.
|
6 |
-
Chatglm3-6B,43.38487973,
|
7 |
-
Chinese-Alpaca-2-13B,37.7,
|
8 |
-
Chinese-Llama-2-13B,29.4,
|
9 |
-
Devops-Model-14B-Chat,30.
|
10 |
-
Ernie-Bot-4.0,61.15,
|
11 |
-
Gpt-3.5-Turbo,66.
|
12 |
-
Gpt-4
|
13 |
-
Internlm-7B,38.7,
|
14 |
-
Internlm2-Chat-20B,56.35738832,
|
15 |
-
Internlm2-Chat-7B,49.74226804,
|
16 |
-
Llama-2-13B,
|
17 |
-
Llama-2-70B-Chat,25.29,
|
18 |
-
Llama-2-7B,
|
19 |
-
Mistral-7B,29.27,
|
20 |
-
Qwen-14B-Chat,
|
21 |
-
Qwen-72B-Chat,70.
|
22 |
-
Qwen-7B-Chat,
|
23 |
-
Yi-34B-Chat,
|
24 |
-
Claude-3-Opus
|
25 |
-
gemma_2b,26.46048,
|
26 |
-
gemma_7b,25.08591,
|
27 |
-
Meta-Llama-3-8B-Instruct
|
28 |
-
Qwen1.5-14B-Base,34.87973,
|
29 |
-
Qwen1.5-14B-Chat,
|
|
|
1 |
+
name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
|
2 |
+
Aquilachat2-34B,36.63,44.83,46.65,
|
3 |
+
Baichuan-13B-Chat,20.4,37.0,26.7,17.8
|
4 |
+
Baichuan2-13B-Chat,15.3,25.8,33.1,27.7
|
5 |
+
Chatglm2-6B,24.7,36.5,37.6,40.5
|
6 |
+
Chatglm3-6B,43.38487973,44.58762887,42.09621993,43.47079038
|
7 |
+
Chinese-Alpaca-2-13B,37.7,49.7,48.6,50.5
|
8 |
+
Chinese-Llama-2-13B,29.4,37.8,40.4,28.8
|
9 |
+
Devops-Model-14B-Chat,30.59,63.63,61.96,44.01
|
10 |
+
Ernie-Bot-4.0,61.15,70.0,60.0,70.0
|
11 |
+
Gpt-3.5-Turbo,66.8,72.0,68.3,72.5
|
12 |
+
Gpt-4,,,,88.7
|
13 |
+
Internlm-7B,38.7,43.9,45.2,51.4
|
14 |
+
Internlm2-Chat-20B,56.35738832,26.18025751,60.48109966,45.10309278
|
15 |
+
Internlm2-Chat-7B,49.74226804,56.18556701,48.19587629,49.74226804
|
16 |
+
Llama-2-13B,46.5,58.7,53.0,61.0
|
17 |
+
Llama-2-70B-Chat,25.29,58.06,52.97,58.55
|
18 |
+
Llama-2-7B,40.0,49.5,46.8,55.2
|
19 |
+
Mistral-7B,29.27,46.3,47.22,45.58
|
20 |
+
Qwen-14B-Chat,47.81,59.4,59.7,55.88
|
21 |
+
Qwen-72B-Chat,70.5,72.56,70.32,70.22
|
22 |
+
Qwen-7B-Chat,46.0,50.1,51.0,49.8
|
23 |
+
Yi-34B-Chat,59.14,68.79,68.37,80.06
|
24 |
+
Claude-3-Opus,69.03417341637355,,,
|
25 |
+
gemma_2b,26.46048,33.41924,26.6323,37.54296
|
26 |
+
gemma_7b,25.08591,50.85911,30.24055,51.55747
|
27 |
+
Meta-Llama-3-8B-Instruct,38.279481659390655,76.69172932330827,23.734458771084668,33.241749376506874
|
28 |
+
Qwen1.5-14B-Base,34.87973,60.82474,65.54983,47.07904
|
29 |
+
Qwen1.5-14B-Chat,56.4433,67.09622,53.52234,64.17526
|
data_v2/network_zh_mc_gen.csv
CHANGED
@@ -1,32 +1,32 @@
|
|
1 |
-
name,
|
2 |
-
Aquilachat2-34B,34.66,
|
3 |
-
Baichuan-13B-Chat,
|
4 |
-
Baichuan2-13B-Chat,35.
|
5 |
-
Chatglm2-6B,33.
|
6 |
-
Chatglm3-6B,41.39414802,
|
7 |
-
Chinese-Alpaca-2-13B,33.1,
|
8 |
-
Chinese-Llama-2-13B,22.5,
|
9 |
-
Devops-Model-14B-Chat,
|
10 |
-
Ernie-Bot-4.0,67.54,
|
11 |
-
Glm3-Turbo,59.63855422
|
12 |
-
Glm4,67.383821
|
13 |
-
Gpt-3.5-Turbo,58.
|
14 |
-
Gpt-4
|
15 |
-
Hunyuan-13B,60.0,
|
16 |
-
Internlm-7B,41.7,
|
17 |
-
Internlm2-Chat-20B,57.48709122,57.
|
18 |
-
Internlm2-Chat-7B,54.30292599,
|
19 |
-
Llama-2-13B,
|
20 |
-
Llama-2-70B-Chat,38.55,
|
21 |
-
Llama-2-7B,
|
22 |
-
Mistral-7B,1.9,
|
23 |
-
Qwen-14B-Chat,48.
|
24 |
-
Qwen-72B-Chat,65.
|
25 |
-
Qwen-7B-Chat,29.
|
26 |
-
Yi-34B-Chat,
|
27 |
-
Claude-3-Opus
|
28 |
-
gemma_2b,29.69019,
|
29 |
-
gemma_7b,31.58348,
|
30 |
-
Meta-Llama-3-8B-Instruct
|
31 |
-
Qwen1.5-14B-Base,45.18072,
|
32 |
-
Qwen1.5-14B-Chat,
|
|
|
1 |
+
name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
|
2 |
+
Aquilachat2-34B,34.66,47.74,44.48,
|
3 |
+
Baichuan-13B-Chat,16.0,49.7,36.1,55.6
|
4 |
+
Baichuan2-13B-Chat,35.9,30.5,35.6,32.0
|
5 |
+
Chatglm2-6B,33.7,42.2,36.0,39.5
|
6 |
+
Chatglm3-6B,41.39414802,49.22547332,38.81239243,42.85714286
|
7 |
+
Chinese-Alpaca-2-13B,33.1,44.2,44.0,42.7
|
8 |
+
Chinese-Llama-2-13B,22.5,38.8,41.8,32.2
|
9 |
+
Devops-Model-14B-Chat,46.57,56.01,60.08,55.79
|
10 |
+
Ernie-Bot-4.0,67.54,71.96,72.0,78.0
|
11 |
+
Glm3-Turbo,59.63855422,,,
|
12 |
+
Glm4,67.383821,,,
|
13 |
+
Gpt-3.5-Turbo,58.6,67.6,59.7,67.4
|
14 |
+
Gpt-4,,,,86.0
|
15 |
+
Hunyuan-13B,60.0,70.0,,
|
16 |
+
Internlm-7B,41.7,38.4,42.6,41.3
|
17 |
+
Internlm2-Chat-20B,57.48709122,57.14285714,59.1222031,50.77452668
|
18 |
+
Internlm2-Chat-7B,54.30292599,59.81067126,58.51979346,51.63511188
|
19 |
+
Llama-2-13B,31.6,57.0,38.9,50.6
|
20 |
+
Llama-2-70B-Chat,38.55,57.49,49.09,48.57
|
21 |
+
Llama-2-7B,30.2,55.6,40.8,50.4
|
22 |
+
Mistral-7B,1.9,45.61,15.0,35.97
|
23 |
+
Qwen-14B-Chat,48.81,57.4,56.12,54.99
|
24 |
+
Qwen-72B-Chat,65.86,68.3,69.4,70.08
|
25 |
+
Qwen-7B-Chat,29.9,53.5,46.9,47.7
|
26 |
+
Yi-34B-Chat,62.56,69.75,65.37,71.21
|
27 |
+
Claude-3-Opus,62.329525111479995,,,
|
28 |
+
gemma_2b,29.69019,39.15663,29.77625,38.64028
|
29 |
+
gemma_7b,31.58348,47.59036,34.68158,48.88124
|
30 |
+
Meta-Llama-3-8B-Instruct,35.904696806952444,38.94801939914722,41.717931191615406,31.059792337987826
|
31 |
+
Qwen1.5-14B-Base,45.18072,59.1222,61.10155,52.4957
|
32 |
+
Qwen1.5-14B-Chat,53.87263,63.85542,58.0895,65.57659
|
data_v2/oracle_en_mc_gen.csv
CHANGED
@@ -1,29 +1,29 @@
|
|
1 |
-
name,
|
2 |
-
Aquilachat2-34B,36.63,
|
3 |
-
Baichuan-13B-Chat,
|
4 |
-
Baichuan2-13B-Chat,
|
5 |
-
Chatglm2-6B,20.
|
6 |
-
Chatglm3-6B,20.92555332,
|
7 |
-
Chinese-Alpaca-2-13B,23.14,
|
8 |
-
Chinese-Llama-2-13B,13.88,
|
9 |
-
Devops-Model-14B-Chat,
|
10 |
-
Ernie-Bot-4.0,43.8,
|
11 |
-
Gpt-3.5-Turbo,38.
|
12 |
-
Gpt-4
|
13 |
-
Internlm-7B,26.36,
|
14 |
-
Internlm2-Chat-20B
|
15 |
-
Internlm2-Chat-7B,27.16297787,
|
16 |
-
Llama-2-13B,
|
17 |
-
Llama-2-70B-Chat,19.72,
|
18 |
-
Llama-2-7B,
|
19 |
-
Mistral-7B,17.1,
|
20 |
-
Qwen-14B-Chat,
|
21 |
-
Qwen-72B-Chat,47.
|
22 |
-
Qwen-7B-Chat,
|
23 |
-
Yi-34B-Chat,
|
24 |
-
Claude-3-Opus
|
25 |
-
gemma_2b,16.90141,
|
26 |
-
gemma_7b,14.28571,
|
27 |
-
Meta-Llama-3-8B-Instruct
|
28 |
-
Qwen1.5-14B-Base,29.17505,
|
29 |
-
Qwen1.5-14B-Chat,
|
|
|
1 |
+
name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
|
2 |
+
Aquilachat2-34B,36.63,44.83,46.65,
|
3 |
+
Baichuan-13B-Chat,11.67,19.52,22.54,28.77
|
4 |
+
Baichuan2-13B-Chat,19.1,22.9,26.5,24.5
|
5 |
+
Chatglm2-6B,20.52,19.72,20.12,22.74
|
6 |
+
Chatglm3-6B,20.92555332,25.15090543,24.74849095,29.1750503
|
7 |
+
Chinese-Alpaca-2-13B,23.14,28.97,16.3,14.29
|
8 |
+
Chinese-Llama-2-13B,13.88,20.52,16.9,23.34
|
9 |
+
Devops-Model-14B-Chat,26.96,38.83,34.81,27.36
|
10 |
+
Ernie-Bot-4.0,43.8,47.14,46.0,54.0
|
11 |
+
Gpt-3.5-Turbo,38.83,42.05,37.63,43.86
|
12 |
+
Gpt-4,,64.56,,62.58
|
13 |
+
Internlm-7B,26.36,25.55,25.55,27.97
|
14 |
+
Internlm2-Chat-20B,,59.21052632,,
|
15 |
+
Internlm2-Chat-7B,27.16297787,28.16901408,29.97987928,30.18108652
|
16 |
+
Llama-2-13B,20.32,29.58,22.33,33.8
|
17 |
+
Llama-2-70B-Chat,19.72,27.97,26.56,32.6
|
18 |
+
Llama-2-7B,23.74,26.56,20.52,33.6
|
19 |
+
Mistral-7B,17.1,26.76,31.19,27.97
|
20 |
+
Qwen-14B-Chat,28.37,36.62,28.37,24.14
|
21 |
+
Qwen-72B-Chat,47.48,48.09,49.7,43.66
|
22 |
+
Qwen-7B-Chat,19.11,23.94,25.55,33.4
|
23 |
+
Yi-34B-Chat,48.69,46.28,58.35,58.95
|
24 |
+
Claude-3-Opus,48.31816996021653,,,
|
25 |
+
gemma_2b,16.90141,19.5171,16.09658,24.74849
|
26 |
+
gemma_7b,14.28571,30.98592,2.60223,43.85965
|
27 |
+
Meta-Llama-3-8B-Instruct,28.468825409248026,40.47805387073632,23.33528989760647,34.6197743429205
|
28 |
+
Qwen1.5-14B-Base,29.17505,33.60161,36.82093,27.7666
|
29 |
+
Qwen1.5-14B-Chat,35.41247,43.05835,33.60161,38.833
|
data_v2/oracle_zh_mc_gen.csv
CHANGED
@@ -1,28 +1,28 @@
|
|
1 |
-
name,
|
2 |
-
Aquilachat2-34B,34.66,
|
3 |
-
Baichuan-13B-Chat,12.
|
4 |
-
Baichuan2-13B-Chat,25.
|
5 |
-
Chatglm2-6B,23.34,
|
6 |
-
Chatglm3-6B,21.32796781,
|
7 |
-
Chinese-Alpaca-2-13B,22.94,
|
8 |
-
Chinese-Llama-2-13B,14.69,
|
9 |
-
Devops-Model-14B-Chat,
|
10 |
-
Ernie-Bot-4.0,48.56,
|
11 |
-
Gpt-3.5-Turbo,
|
12 |
-
Gpt-4
|
13 |
-
Internlm-7B,25.96,25.96,
|
14 |
-
Internlm2-Chat-7B,28.57142857,
|
15 |
-
Llama-2-13B,
|
16 |
-
Llama-2-70B-Chat,15.29,
|
17 |
-
Llama-2-7B,20.72,
|
18 |
-
Mistral-7B,1.9,
|
19 |
-
Qwen-14B-Chat,27.57,
|
20 |
-
Qwen-72B-Chat,48.
|
21 |
-
Qwen-7B-Chat,
|
22 |
-
Yi-34B-Chat,49.
|
23 |
-
Claude-3-Opus
|
24 |
-
gemma_2b,18.51107,
|
25 |
-
gemma_7b,19.3159,
|
26 |
-
Meta-Llama-3-8B-Instruct
|
27 |
-
Qwen1.5-14B-Base,20.92555,
|
28 |
-
Qwen1.5-14B-Chat,
|
|
|
1 |
+
name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
|
2 |
+
Aquilachat2-34B,34.66,47.74,44.48,
|
3 |
+
Baichuan-13B-Chat,12.07,27.57,19.52,30.58
|
4 |
+
Baichuan2-13B-Chat,25.5,21.3,26.7,24.7
|
5 |
+
Chatglm2-6B,23.34,24.14,22.94,26.16
|
6 |
+
Chatglm3-6B,21.32796781,28.97384306,21.73038229,29.57746479
|
7 |
+
Chinese-Alpaca-2-13B,22.94,25.75,25.15,22.33
|
8 |
+
Chinese-Llama-2-13B,14.69,19.92,19.72,20.93
|
9 |
+
Devops-Model-14B-Chat,22.74,27.77,37.02,26.36
|
10 |
+
Ernie-Bot-4.0,48.56,50.64,48.0,54.0
|
11 |
+
Gpt-3.5-Turbo,35.81,43.26,39.44,27.77
|
12 |
+
Gpt-4,,65.17,,48.09
|
13 |
+
Internlm-7B,25.96,25.96,29.18,28.37
|
14 |
+
Internlm2-Chat-7B,28.57142857,31.79074447,30.78470825,31.18712274
|
15 |
+
Llama-2-13B,24.35,31.99,26.76,20.72
|
16 |
+
Llama-2-70B-Chat,15.29,34.81,26.76,33.8
|
17 |
+
Llama-2-7B,20.72,27.97,18.51,17.91
|
18 |
+
Mistral-7B,1.9,45.61,15.0,35.97
|
19 |
+
Qwen-14B-Chat,27.57,36.02,35.41,33.4
|
20 |
+
Qwen-72B-Chat,48.49,49.7,49.7,44.87
|
21 |
+
Qwen-7B-Chat,17.71,28.37,29.58,31.79
|
22 |
+
Yi-34B-Chat,49.3,53.72,56.34,54.33
|
23 |
+
Claude-3-Opus,50.00570664579664,,,
|
24 |
+
gemma_2b,18.51107,24.9497,21.52918,27.7666
|
25 |
+
gemma_7b,19.3159,53.94737,18.51107,5.204461
|
26 |
+
Meta-Llama-3-8B-Instruct,33.91785690993282,27.773429857170807,41.359323028761494,32.62733972477663
|
27 |
+
Qwen1.5-14B-Base,20.92555,35.61368,41.44869,30.78471
|
28 |
+
Qwen1.5-14B-Chat,23.34004,41.04628,38.02817,40.04024
|
data_v2/pufa_zh_mc_gen.csv
CHANGED
@@ -1,22 +1,22 @@
|
|
1 |
-
name,
|
2 |
-
Baichuan2-13B-Chat,
|
3 |
-
Chatglm3-6B,60.0,
|
4 |
-
Devops-Model-14B-Chat,29.33,
|
5 |
-
Ernie-Bot-4.0,86.67,86.67,
|
6 |
-
Gpt-3.5-Turbo,77.33,
|
7 |
-
GPT-4,88.0,
|
8 |
-
Internlm2-Chat-20B,76.0,
|
9 |
-
Internlm2-Chat-7B,78.66666667,
|
10 |
-
Llama-2-13B,44.0,
|
11 |
-
Llama-2-70B-Chat,6.67,
|
12 |
-
Llama-2-7B,25.33,
|
13 |
-
Mistral-7B,4.0,
|
14 |
-
Qwen-14B-Chat,73.33,
|
15 |
-
Qwen-72B-Chat,90.67,
|
16 |
-
Yi-34B-Chat,84.0,
|
17 |
-
Claude-3-Opus
|
18 |
-
gemma_2b,36.0,
|
19 |
-
gemma_7b,34.66667,
|
20 |
-
Meta-Llama-3-8B-Instruct
|
21 |
-
Qwen1.5-14B-Base,78.66667,
|
22 |
-
Qwen1.5-14B-Chat,
|
|
|
1 |
+
name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
|
2 |
+
Baichuan2-13B-Chat,66.67,66.67,61.33,62.67
|
3 |
+
Chatglm3-6B,60.0,61.33333333,56.0,58.66666667
|
4 |
+
Devops-Model-14B-Chat,29.33,61.33,81.33,70.67
|
5 |
+
Ernie-Bot-4.0,86.67,86.67,82.67,86.67
|
6 |
+
Gpt-3.5-Turbo,77.33,81.33,78.67,82.67
|
7 |
+
GPT-4,88.0,86.67,84.0,90.67
|
8 |
+
Internlm2-Chat-20B,76.0,80.0,80.0,
|
9 |
+
Internlm2-Chat-7B,78.66666667,72.0,72.0,53.33333333
|
10 |
+
Llama-2-13B,44.0,68.0,61.33,53.33
|
11 |
+
Llama-2-70B-Chat,6.67,65.33,49.33,66.67
|
12 |
+
Llama-2-7B,25.33,40.0,48.0,52.0
|
13 |
+
Mistral-7B,4.0,58.67,22.67,54.67
|
14 |
+
Qwen-14B-Chat,73.33,72.0,73.33,80.0
|
15 |
+
Qwen-72B-Chat,90.67,85.33,88.0,82.67
|
16 |
+
Yi-34B-Chat,84.0,88.0,92.0,89.33
|
17 |
+
Claude-3-Opus,93.24324324324324,,,
|
18 |
+
gemma_2b,36.0,41.33333,36.0,30.66667
|
19 |
+
gemma_7b,34.66667,56.0,46.66667,56.0
|
20 |
+
Meta-Llama-3-8B-Instruct,85.8108108108108,31.756756756756754,83.1081081081081,27.7027027027027
|
21 |
+
Qwen1.5-14B-Base,78.66667,72.0,92.0,42.66667
|
22 |
+
Qwen1.5-14B-Chat,89.33333,85.33333,80.0,85.33333
|
data_v2/rzy_zh_mc_gen.csv
CHANGED
@@ -1,22 +1,22 @@
|
|
1 |
-
name,
|
2 |
-
Baichuan2-13B-Chat,60.17,
|
3 |
-
Chatglm3-6B,54.21853389,
|
4 |
-
Devops-Model-14B-Chat,
|
5 |
-
Ernie-Bot-4.0,76.0,
|
6 |
-
Gpt-3.5-Turbo,65.
|
7 |
-
GPT-4,65.56,
|
8 |
-
Internlm2-Chat-20B,63.90041494,
|
9 |
-
Internlm2-Chat-7B,61.2724758,
|
10 |
-
Llama-2-13B,51.18,
|
11 |
-
Llama-2-70B-Chat,5.26,
|
12 |
-
Llama-2-7B,34.85,
|
13 |
-
Mistral-7B,18.53,
|
14 |
-
Qwen-14B-Chat,61.
|
15 |
-
Qwen-72B-Chat,66.67,
|
16 |
-
Yi-34B-Chat,64.
|
17 |
-
Claude-3-Opus
|
18 |
-
gemma_2b,36.37621,
|
19 |
-
gemma_7b,39.41909,
|
20 |
-
Meta-Llama-3-8B-Instruct
|
21 |
-
Qwen1.5-14B-Base,51.17566,
|
22 |
-
Qwen1.5-14B-Chat,
|
|
|
1 |
+
name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
|
2 |
+
Baichuan2-13B-Chat,60.17,67.5,59.34,64.32
|
3 |
+
Chatglm3-6B,54.21853389,62.10235131,55.32503458,59.33609959
|
4 |
+
Devops-Model-14B-Chat,56.85,59.2,64.18,61.83
|
5 |
+
Ernie-Bot-4.0,76.0,79.0,73.0,77.0
|
6 |
+
Gpt-3.5-Turbo,65.42,67.5,66.25,68.74
|
7 |
+
GPT-4,65.56,68.05,65.28,68.19
|
8 |
+
Internlm2-Chat-20B,63.90041494,64.03872752,,
|
9 |
+
Internlm2-Chat-7B,61.2724758,63.62378976,65.00691563,54.21853389
|
10 |
+
Llama-2-13B,51.18,59.06,57.12,53.39
|
11 |
+
Llama-2-70B-Chat,5.26,62.52,48.82,59.75
|
12 |
+
Llama-2-7B,34.85,44.95,46.2,53.39
|
13 |
+
Mistral-7B,18.53,60.3,29.88,59.75
|
14 |
+
Qwen-14B-Chat,61.55,64.45,63.49,65.98
|
15 |
+
Qwen-72B-Chat,66.67,65.28,65.98,70.12
|
16 |
+
Yi-34B-Chat,64.59,67.36,60.03,57.54
|
17 |
+
Claude-3-Opus,67.64288271089369,,,
|
18 |
+
gemma_2b,36.37621,45.22822,33.60996,37.75934
|
19 |
+
gemma_7b,39.41909,54.77178,42.04703,56.70816
|
20 |
+
Meta-Llama-3-8B-Instruct,36.55172413793103,28.27586206896552,38.62068965517241,34.48275862068966
|
21 |
+
Qwen1.5-14B-Base,51.17566,62.6556,65.42185,50.89903
|
22 |
+
Qwen1.5-14B-Chat,64.03873,64.31535,63.7621,65.9751
|
data_v2/zabbix_zh_mc_gen.csv
CHANGED
@@ -1,22 +1,22 @@
|
|
1 |
-
name,
|
2 |
-
Baichuan2-13B-Chat,
|
3 |
-
Chatglm3-6B,29.0,
|
4 |
-
Devops-Model-14B-Chat,
|
5 |
-
Ernie-Bot-4.0,44.0,
|
6 |
-
Gpt-3.5-Turbo,36.0,
|
7 |
-
GPT-4,51.0,
|
8 |
-
Internlm2-Chat-20B,41.0
|
9 |
-
Internlm2-Chat-7B,43.0,
|
10 |
-
Llama-2-13B,28.0,
|
11 |
-
Llama-2-70B-Chat,1.0,
|
12 |
-
Llama-2-7B,18.0,
|
13 |
-
Mistral-7B,6.0,
|
14 |
-
Qwen-14B-Chat,36.0,
|
15 |
-
Qwen-72B-Chat,46.0,
|
16 |
-
Yi-34B-Chat,40.0,40.0,
|
17 |
-
Claude-3-Opus
|
18 |
-
gemma_2b,25.0,
|
19 |
-
gemma_7b,22.0,
|
20 |
-
Meta-Llama-3-8B-Instruct
|
21 |
-
Qwen1.5-14B-Base,38.0,
|
22 |
-
Qwen1.5-14B-Chat,34.0,
|
|
|
1 |
+
name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
|
2 |
+
Baichuan2-13B-Chat,29.0,47.0,27.0,43.0
|
3 |
+
Chatglm3-6B,29.0,36.0,29.0,34.0
|
4 |
+
Devops-Model-14B-Chat,28.0,33.0,44.0,46.0
|
5 |
+
Ernie-Bot-4.0,44.0,48.0,47.0,51.0
|
6 |
+
Gpt-3.5-Turbo,36.0,42.0,40.0,48.0
|
7 |
+
GPT-4,51.0,53.0,60.0,59.0
|
8 |
+
Internlm2-Chat-20B,41.0,,44.0,
|
9 |
+
Internlm2-Chat-7B,43.0,39.0,45.0,35.0
|
10 |
+
Llama-2-13B,28.0,45.0,40.0,43.0
|
11 |
+
Llama-2-70B-Chat,1.0,47.0,29.0,46.0
|
12 |
+
Llama-2-7B,18.0,35.0,22.0,28.0
|
13 |
+
Mistral-7B,6.0,42.0,11.0,44.0
|
14 |
+
Qwen-14B-Chat,36.0,41.0,40.0,43.0
|
15 |
+
Qwen-72B-Chat,46.0,44.0,45.0,61.0
|
16 |
+
Yi-34B-Chat,40.0,40.0,42.0,42.0
|
17 |
+
Claude-3-Opus,61.71875,,,
|
18 |
+
gemma_2b,25.0,32.0,24.0,30.0
|
19 |
+
gemma_7b,22.0,44.0,28.0,40.0
|
20 |
+
Meta-Llama-3-8B-Instruct,39.670138888888886,37.58680555555556,30.381944444444443,33.072916666666664
|
21 |
+
Qwen1.5-14B-Base,38.0,39.0,48.0,36.0
|
22 |
+
Qwen1.5-14B-Chat,34.0,43.0,39.0,49.0
|
data_v2/zjyd_zh_mc_gen.csv
CHANGED
@@ -1,29 +1,29 @@
|
|
1 |
-
name,
|
2 |
-
Baichuan-13B-Chat,11.
|
3 |
-
Chatglm2-6B,23.
|
4 |
-
Chatglm3-6B,32.6,
|
5 |
-
Chinese-Alpaca-2-13B,22.69,
|
6 |
-
Chinese-Llama-2-13B,17.98,17.
|
7 |
-
Devops-Model-14B-Chat,
|
8 |
-
Ernie-Bot-4.0,45.99,
|
9 |
-
Glm3-Turbo,43.0
|
10 |
-
Glm4,50.0
|
11 |
-
Gpt-3.5-Turbo,
|
12 |
-
Gpt-4
|
13 |
-
Internlm-7B,27.81,
|
14 |
-
Internlm2-Chat-20B,44.6,
|
15 |
-
Internlm2-Chat-7B,38.8,
|
16 |
-
Llama-2-13B,
|
17 |
-
Llama-2-70B-Chat,24.38,
|
18 |
-
Llama-2-7B,
|
19 |
-
Mistral-7B,1.27,
|
20 |
-
Qwen-14B-Chat,41.
|
21 |
-
Qwen-72B-Chat,64.79,
|
22 |
-
Qwen-7B-Chat,36.
|
23 |
-
Yi-34B-Chat,64.
|
24 |
-
Claude-3-Opus
|
25 |
-
gemma_2b,25.6,
|
26 |
-
gemma_7b,27.3,
|
27 |
-
Meta-Llama-3-8B-Instruct
|
28 |
-
Qwen1.5-14B-Base,49.1,49.
|
29 |
-
Qwen1.5-14B-Chat,38.
|
|
|
1 |
+
name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
|
2 |
+
Baichuan-13B-Chat,11.13,28.61,13.22,33.97
|
3 |
+
Chatglm2-6B,23.12,24.08,30.46,35.9
|
4 |
+
Chatglm3-6B,32.6,35.4,28.3,40.9
|
5 |
+
Chinese-Alpaca-2-13B,22.69,24.59,40.52,40.73
|
6 |
+
Chinese-Llama-2-13B,17.98,17.83,31.66,36.24
|
7 |
+
Devops-Model-14B-Chat,42.7,53.57,57.25,54.29
|
8 |
+
Ernie-Bot-4.0,45.99,48.98,46.0,54.0
|
9 |
+
Glm3-Turbo,43.0,,,
|
10 |
+
Glm4,50.0,,,
|
11 |
+
Gpt-3.5-Turbo,36.83,39.25,39.77,42.15
|
12 |
+
Gpt-4,,62.11,,65.68
|
13 |
+
Internlm-7B,27.81,19.95,24.18,35.35
|
14 |
+
Internlm2-Chat-20B,44.6,47.0,62.2,38.3
|
15 |
+
Internlm2-Chat-7B,38.8,44.6,46.0,35.8
|
16 |
+
Llama-2-13B,27.16,29.99,36.15,39.02
|
17 |
+
Llama-2-70B-Chat,24.38,43.63,44.65,48.84
|
18 |
+
Llama-2-7B,23.47,29.26,30.03,31.93
|
19 |
+
Mistral-7B,1.27,42.05,30.72,46.44
|
20 |
+
Qwen-14B-Chat,41.44,47.98,49.92,58.85
|
21 |
+
Qwen-72B-Chat,64.79,65.72,70.19,68.38
|
22 |
+
Qwen-7B-Chat,36.5,33.51,40.59,31.46
|
23 |
+
Yi-34B-Chat,64.58,65.51,70.92,47.97
|
24 |
+
Claude-3-Opus,68.05555555555556,,,
|
25 |
+
gemma_2b,25.6,28.3,19.1,35.5
|
26 |
+
gemma_7b,27.3,35.4,17.3,44.5
|
27 |
+
Meta-Llama-3-8B-Instruct,63.425925925925924,0.0,66.2037037037037,25.0
|
28 |
+
Qwen1.5-14B-Base,49.1,49.9,62.5,41.3
|
29 |
+
Qwen1.5-14B-Chat,38.9,50.5,55.2,52.7
|
data_v2/zte_en_mc_gen.csv
CHANGED
@@ -1,28 +1,28 @@
|
|
1 |
-
name,
|
2 |
-
Baichuan-13B-Chat,
|
3 |
-
Chatglm2-6B,
|
4 |
-
Chatglm3-6B,30.4,30.
|
5 |
-
Chinese-Alpaca-2-13B,20.86,
|
6 |
-
Chinese-Llama-2-13B,10.02,
|
7 |
-
Devops-Model-14B-Chat,
|
8 |
-
Ernie-Bot-4.0,43.66,
|
9 |
-
Gpt-3.5-Turbo,
|
10 |
-
Gpt-4
|
11 |
-
Internlm-7B,20.48,
|
12 |
-
Internlm2-Chat-20B,39.1,
|
13 |
-
Internlm2-Chat-7B,36.8,
|
14 |
-
Llama-2-13B,
|
15 |
-
Llama-2-70B-Chat,23.64,
|
16 |
-
Llama-2-7B,
|
17 |
-
Mistral-7B,26.91,
|
18 |
-
Qwen-14B-Chat,
|
19 |
-
Qwen-72B-Chat,53.19,
|
20 |
-
Qwen-7B-Chat,33.
|
21 |
-
Yi-34B-Chat,
|
22 |
-
Claude-3-Opus
|
23 |
-
gemma_2b,20.1,
|
24 |
-
gemma_7b,23.1,
|
25 |
-
Meta-Llama-3-70B-Instruct
|
26 |
-
Meta-Llama-3-8B-Instruct
|
27 |
-
Qwen1.5-14B-Base,34.0,
|
28 |
-
Qwen1.5-14B-Chat,
|
|
|
1 |
+
name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
|
2 |
+
Baichuan-13B-Chat,14.31,18.46,15.68,16.82
|
3 |
+
Chatglm2-6B,16.06,19.91,26.22,28.37
|
4 |
+
Chatglm3-6B,30.4,30.7,26.9,37.2
|
5 |
+
Chinese-Alpaca-2-13B,20.86,23.08,29.75,32.83
|
6 |
+
Chinese-Llama-2-13B,10.02,19.51,34.51,33.34
|
7 |
+
Devops-Model-14B-Chat,30.51,47.37,49.38,47.23
|
8 |
+
Ernie-Bot-4.0,43.66,51.99,44.0,50.0
|
9 |
+
Gpt-3.5-Turbo,34.82,43.5,39.19,42.58
|
10 |
+
Gpt-4,,65.49,,63.54
|
11 |
+
Internlm-7B,20.48,23.85,23.69,26.06
|
12 |
+
Internlm2-Chat-20B,39.1,37.7,47.7,33.5
|
13 |
+
Internlm2-Chat-7B,36.8,31.7,46.3,36.9
|
14 |
+
Llama-2-13B,18.32,34.45,29.14,44.3
|
15 |
+
Llama-2-70B-Chat,23.64,39.31,39.12,47.9
|
16 |
+
Llama-2-7B,21.62,27.11,24.85,34.83
|
17 |
+
Mistral-7B,26.91,30.65,40.52,46.84
|
18 |
+
Qwen-14B-Chat,36.25,42.51,50.39,59.18
|
19 |
+
Qwen-72B-Chat,53.19,55.52,58.13,58.99
|
20 |
+
Qwen-7B-Chat,33.74,34.1,32.7,36.65
|
21 |
+
Yi-34B-Chat,37.04,52.1,61.19,53.39
|
22 |
+
Claude-3-Opus,49.599999999999994,,,
|
23 |
+
gemma_2b,20.1,24.2,31.2,35.5
|
24 |
+
gemma_7b,23.1,34.4,21.4,33.1
|
25 |
+
Meta-Llama-3-70B-Instruct,38.9,63.4,37.6,59.0
|
26 |
+
Meta-Llama-3-8B-Instruct,24.7,35.4,19.7,32.9
|
27 |
+
Qwen1.5-14B-Base,34.0,42.8,57.9,40.2
|
28 |
+
Qwen1.5-14B-Chat,35.6,41.1,34.7,47.4
|
data_v2/zte_zh_mc_gen.csv
CHANGED
@@ -1,30 +1,30 @@
|
|
1 |
-
name,
|
2 |
-
Baichuan-13B-Chat,11.
|
3 |
-
Chatglm2-6B,23.
|
4 |
-
Chatglm3-6B,32.6,
|
5 |
-
Chinese-Alpaca-2-13B,22.69,
|
6 |
-
Chinese-Llama-2-13B,17.98,17.
|
7 |
-
Devops-Model-14B-Chat,
|
8 |
-
Ernie-Bot-4.0,45.99,
|
9 |
-
Glm3-Turbo,43.0
|
10 |
-
Glm4,50.0
|
11 |
-
Gpt-3.5-Turbo,
|
12 |
-
Gpt-4
|
13 |
-
Internlm-7B,27.81,
|
14 |
-
Internlm2-Chat-20B,44.6,
|
15 |
-
Internlm2-Chat-7B,38.8,
|
16 |
-
Llama-2-13B,
|
17 |
-
Llama-2-70B-Chat,24.38,
|
18 |
-
Llama-2-7B,
|
19 |
-
Mistral-7B,1.27,
|
20 |
-
Qwen-14B-Chat,41.
|
21 |
-
Qwen-72B-Chat,64.79,
|
22 |
-
Qwen-7B-Chat,36.
|
23 |
-
Yi-34B-Chat,64.
|
24 |
-
Claude-3-Opus
|
25 |
-
gemma_2b,25.6,
|
26 |
-
gemma_7b,27.3,
|
27 |
-
Meta-Llama-3-70B-Instruct
|
28 |
-
Meta-Llama-3-8B-Instruct
|
29 |
-
Qwen1.5-14B-Base,49.1,49.
|
30 |
-
Qwen1.5-14B-Chat,38.
|
|
|
1 |
+
name,zero_self_con,zero_cot_self_con,few_self_con,few_cot_self_con
|
2 |
+
Baichuan-13B-Chat,11.13,28.61,13.22,33.97
|
3 |
+
Chatglm2-6B,23.12,24.08,30.46,35.9
|
4 |
+
Chatglm3-6B,32.6,35.4,28.3,40.9
|
5 |
+
Chinese-Alpaca-2-13B,22.69,24.59,40.52,40.73
|
6 |
+
Chinese-Llama-2-13B,17.98,17.83,31.66,36.24
|
7 |
+
Devops-Model-14B-Chat,42.7,53.57,57.25,54.29
|
8 |
+
Ernie-Bot-4.0,45.99,48.98,46.0,54.0
|
9 |
+
Glm3-Turbo,43.0,,,
|
10 |
+
Glm4,50.0,,,
|
11 |
+
Gpt-3.5-Turbo,36.83,39.25,39.77,42.15
|
12 |
+
Gpt-4,,62.11,,65.68
|
13 |
+
Internlm-7B,27.81,19.95,24.18,35.35
|
14 |
+
Internlm2-Chat-20B,44.6,47.0,62.2,38.3
|
15 |
+
Internlm2-Chat-7B,38.8,44.6,46.0,35.8
|
16 |
+
Llama-2-13B,27.16,29.99,36.15,39.02
|
17 |
+
Llama-2-70B-Chat,24.38,43.63,44.65,48.84
|
18 |
+
Llama-2-7B,23.47,29.26,30.03,31.93
|
19 |
+
Mistral-7B,1.27,42.05,30.72,46.44
|
20 |
+
Qwen-14B-Chat,41.44,47.98,49.92,58.85
|
21 |
+
Qwen-72B-Chat,64.79,65.72,70.19,68.38
|
22 |
+
Qwen-7B-Chat,36.5,33.51,40.59,31.46
|
23 |
+
Yi-34B-Chat,64.58,65.51,70.92,47.97
|
24 |
+
Claude-3-Opus,51.4,,,
|
25 |
+
gemma_2b,25.6,28.3,19.1,35.5
|
26 |
+
gemma_7b,27.3,35.4,17.3,44.5
|
27 |
+
Meta-Llama-3-70B-Instruct,31.1,37.4,51.10000000000001,36.900000000000006
|
28 |
+
Meta-Llama-3-8B-Instruct,31.1,34.3,36.0,37.1
|
29 |
+
Qwen1.5-14B-Base,49.1,49.9,62.5,41.3
|
30 |
+
Qwen1.5-14B-Chat,38.9,50.5,55.2,52.7
|