Spaces:
Running
Running
{ | |
"method_name": "EAP (CF)", | |
"results": [ | |
{ | |
"model_id": "gemma2", | |
"scores": { | |
"mcqa": { | |
"edge_counts": [ | |
40.0, | |
76.0, | |
176.0, | |
332.0, | |
962.0, | |
3256.0, | |
6866.0, | |
14441.0, | |
36027.0, | |
74218.0 | |
], | |
"faithfulness": [ | |
0.05782312925170068, | |
0.06462585034013606, | |
0.13435374149659865, | |
0.19727891156462585, | |
0.3521471088435374, | |
0.9591836734693877, | |
1.2857142857142858, | |
1.435374149659864, | |
1.5510204081632653, | |
1.0 | |
] | |
}, | |
"arc_easy": { | |
"edge_counts": [ | |
33.0, | |
85.0, | |
231.0, | |
451.0, | |
1054.0, | |
3305.0, | |
7024.0, | |
14506.0, | |
36031.0, | |
74218.0 | |
], | |
"faithfulness": [ | |
0.04447852760736196, | |
0.06441717791411043, | |
0.21779141104294478, | |
0.3159509202453988, | |
0.5076687116564417, | |
0.8374233128834356, | |
1.0245398773006136, | |
1.138036809815951, | |
1.334355828220859, | |
1.0 | |
] | |
}, | |
"ioi": { | |
"edge_counts": [ | |
1193.0, | |
2592.0, | |
7327.0, | |
15198.0, | |
30956.0, | |
78020.0, | |
156182.0, | |
311361.0, | |
768765.0, | |
1592881.0 | |
], | |
"faithfulness": [ | |
0.08709677419354839, | |
0.16612903225806452, | |
0.37661290322580643, | |
0.5518145161290322, | |
0.8741935483870967, | |
1.0354838709677419, | |
1.3612903225806452, | |
1.3612903225806452, | |
1.4645161290322581, | |
1.0 | |
] | |
} | |
} | |
}, | |
{ | |
"model_id": "qwen2.5", | |
"scores": { | |
"ioi": { | |
"edge_counts": [ | |
110.0, | |
246.0, | |
653.0, | |
1455.0, | |
3114.0, | |
8393.0, | |
17273.0, | |
34695.0, | |
87690.0, | |
179749.0 | |
], | |
"faithfulness": [ | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.01557632398753894, | |
0.021806853582554516, | |
0.018691588785046728, | |
1.0 | |
] | |
}, | |
"mcqa": { | |
"edge_counts": [ | |
128.0, | |
262.0, | |
724.0, | |
1586.0, | |
3413.0, | |
8813.0, | |
17768.0, | |
35535.0, | |
86556.0, | |
179749.0 | |
], | |
"faithfulness": [ | |
0.27681388012618297, | |
0.3994479495268139, | |
0.5441640378548895, | |
0.5291798107255521, | |
0.48738170347003157, | |
0.5977917981072555, | |
0.7523659305993691, | |
0.6876971608832808, | |
0.667192429022082, | |
1.0 | |
] | |
} | |
} | |
}, | |
{ | |
"model_id": "gpt2", | |
"scores": { | |
"ioi": { | |
"edge_counts": [ | |
28.17708396911621, | |
74.19965362548828, | |
317.2265625, | |
881.1848754882812, | |
2312.229248046875, | |
7085.25, | |
15016.50390625, | |
30456.0, | |
76492.484375, | |
152985.0 | |
], | |
"faithfulness": [ | |
0.0, | |
0.22449694731820746, | |
0.5048270710448762, | |
0.7968936313069007, | |
0.976166532167683, | |
1.2088630716953397, | |
1.232255211567972, | |
1.2651923955277595, | |
1.3237581777016123, | |
1.0 | |
] | |
} | |
} | |
}, | |
{ | |
"model_id": "llama3", | |
"scores": { | |
"arithmetic_subtraction": { | |
"edge_counts": [ | |
1101.0, | |
2521.0, | |
7330.0, | |
15268.0, | |
31128.0, | |
78568.0, | |
157165.0, | |
313324.0, | |
773309.0, | |
1592881.0 | |
], | |
"faithfulness": [ | |
0.21634615384615385, | |
0.3076923076923077, | |
0.4110576923076923, | |
0.4467147435897436, | |
0.49854767628205127, | |
0.5422676282051282, | |
0.5584935897435898, | |
0.5737179487179487, | |
0.577323717948718, | |
1.0 | |
] | |
}, | |
"arithmetic_addition": { | |
"edge_counts": [ | |
1105.0, | |
2636.0, | |
7488.0, | |
15424.0, | |
31195.0, | |
78423.0, | |
156792.0, | |
312466.0, | |
776673.0, | |
1592881.0 | |
], | |
"faithfulness": [ | |
0.08, | |
0.13411764705882354, | |
0.18352941176470589, | |
0.22588235294117648, | |
0.2847058823529412, | |
0.3588235294117647, | |
0.37529411764705883, | |
0.3841176470588235, | |
0.3988235294117647, | |
1.0 | |
] | |
}, | |
"mcqa": { | |
"edge_counts": [ | |
1204.0, | |
2743.0, | |
7462.0, | |
15300.0, | |
31172.0, | |
78615.0, | |
157496.0, | |
314048.0, | |
767228.0, | |
1592881.0 | |
], | |
"faithfulness": [ | |
0.014056224899598393, | |
0.04618473895582329, | |
0.1144578313253012, | |
0.19678714859437751, | |
0.19076305220883535, | |
0.3644578313253012, | |
0.5144327309236948, | |
0.7590361445783133, | |
0.7570281124497992, | |
1.0 | |
] | |
}, | |
"arc_challenge": { | |
"edge_counts": [ | |
1080.0, | |
2633.0, | |
6870.0, | |
14850.0, | |
30338.0, | |
77983.0, | |
156741.0, | |
306677.0, | |
781379.0, | |
1592881.0 | |
], | |
"faithfulness": [ | |
-0.0024390243902439024, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.014634146341463415, | |
0.007317073170731708, | |
1.0 | |
] | |
} | |
} | |
} | |
] | |
} |