Spaces:
Running
Running
{ | |
"method_name": "NAP (CF)", | |
"results": [ | |
{ | |
"model_id": "gemma2", | |
"scores": { | |
"arc_easy": { | |
"edge_counts": [ | |
28.17708396911621, | |
74.19965362548828, | |
317.2265625, | |
881.1848754882812, | |
2312.229248046875, | |
7085.25, | |
15016.50390625, | |
30456.0, | |
76492.484375, | |
152985.0 | |
], | |
"faithfulness": [ | |
0.004601226993865031, | |
0.006134969325153374, | |
0.013803680981595092, | |
0.02607361963190184, | |
0.04754601226993865, | |
0.12423312883435583, | |
0.24233128834355827, | |
0.5751533742331288, | |
1.6411042944785277, | |
1.0 | |
] | |
}, | |
"mcqa": { | |
"edge_counts": [ | |
44.14409637451172, | |
142.76388549804688, | |
553.3840942382812, | |
1318.2525634765625, | |
2904.385498046875, | |
7578.75, | |
15298.49609375, | |
30597.0078125, | |
76492.4765625, | |
152985.0 | |
], | |
"faithfulness": [ | |
0.0, | |
-0.003401360544217687, | |
-0.003401360544217687, | |
-0.0017006802721088435, | |
0.0017006802721088435, | |
0.02040816326530612, | |
0.022108843537414966, | |
0.10204081632653061, | |
0.24489795918367346, | |
1.0 | |
] | |
}, | |
"ioi": { | |
"edge_counts": [ | |
78.42621612548828, | |
195.36111450195312, | |
587.4566040039062, | |
1273.60595703125, | |
2791.59375, | |
7614.0, | |
15298.5, | |
30596.99609375, | |
76492.484375, | |
152985.0 | |
], | |
"faithfulness": [ | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.003215434083601286, | |
0.01929260450160772, | |
0.07395498392282958, | |
0.1607717041800643, | |
1.0 | |
] | |
} | |
} | |
}, | |
{ | |
"model_id": "llama3", | |
"scores": { | |
"arithmetic_subtraction": { | |
"edge_counts": [ | |
210.320068359375, | |
596.0830078125, | |
3139.23779296875, | |
11288.572265625, | |
34563.7265625, | |
117960.8671875, | |
271754.5625, | |
614539.5, | |
1639406.875, | |
3281985.0 | |
], | |
"faithfulness": [ | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.003205128205128205, | |
0.00641025641025641, | |
0.01282051282051282, | |
0.041666666666666664, | |
0.08012820512820513, | |
1.0 | |
] | |
}, | |
"arc_easy": { | |
"edge_counts": [ | |
22.194580078125, | |
59.185546875, | |
248.390869140625, | |
507.3515625, | |
9259.2734375, | |
77213.6328125, | |
270803.25, | |
646249.5625, | |
1640992.125, | |
3281985.0 | |
], | |
"faithfulness": [ | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.02, | |
1.0 | |
] | |
}, | |
"arc_challenge": { | |
"edge_counts": [ | |
31.70654296875, | |
90.89208984375, | |
232.5361328125, | |
792.73681640625, | |
9893.4697265625, | |
64371.125, | |
143329.125, | |
628492.0, | |
1640992.375, | |
3281985.0 | |
], | |
"faithfulness": [ | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.012195121951219513, | |
1.0 | |
] | |
}, | |
"ioi": { | |
"edge_counts": [ | |
361.45458984375, | |
1048.4296875, | |
4175.08056640625, | |
11753.64453125, | |
31984.65625, | |
109082.09375, | |
260973.203125, | |
612637.0, | |
1640992.375, | |
3281985.0 | |
], | |
"faithfulness": [ | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.009230769230769232, | |
0.015384615384615385, | |
0.015384615384615385, | |
1.0 | |
] | |
}, | |
"arithmetic_addition": { | |
"edge_counts": [ | |
161.703369140625, | |
450.23291015625, | |
2742.869384765625, | |
10559.25390625, | |
34246.625, | |
117802.3125, | |
275559.8125, | |
616442.0625, | |
1639407.0, | |
3281985.0 | |
], | |
"faithfulness": [ | |
0.0, | |
-0.002352941176470588, | |
0.0, | |
0.0, | |
0.0, | |
0.004705882352941176, | |
0.011764705882352941, | |
0.0, | |
0.0, | |
1.0 | |
] | |
}, | |
"mcqa": { | |
"edge_counts": [ | |
700.714599609375, | |
2084.1767578125, | |
9259.166015625, | |
23591.84765625, | |
54921.44140625, | |
153634.5, | |
320587.96875, | |
655762.5625, | |
1640992.375, | |
3281985.0 | |
], | |
"faithfulness": [ | |
0.0, | |
0.0, | |
0.0019305019305019305, | |
0.0019305019305019305, | |
0.0, | |
-0.007722007722007722, | |
-0.02702702702702703, | |
-0.07722007722007722, | |
-0.05791505791505792, | |
1.0 | |
] | |
} | |
} | |
}, | |
{ | |
"model_id": "qwen2.5", | |
"scores": { | |
"ioi": { | |
"edge_counts": [ | |
32.8046875, | |
88.68080139160156, | |
348.3046875, | |
1010.625, | |
3039.563720703125, | |
11605.576171875, | |
29601.451171875, | |
70466.984375, | |
185914.984375, | |
372913.0 | |
], | |
"faithfulness": [ | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.006230529595015576, | |
0.024922118380062305, | |
0.04984423676012461, | |
0.06853582554517133, | |
0.08099688473520249, | |
1.0 | |
] | |
}, | |
"mcqa": { | |
"edge_counts": [ | |
21.98995590209961, | |
100.21651458740234, | |
516.140625, | |
1844.390625, | |
5357.140625, | |
17507.634765625, | |
37182.30859375, | |
74582.3984375, | |
186456.53125, | |
372913.0 | |
], | |
"faithfulness": [ | |
-0.0031645569620253164, | |
-0.0031645569620253164, | |
-0.0031645569620253164, | |
-0.0031645569620253164, | |
-0.0031645569620253164, | |
0.012658227848101266, | |
0.012658227848101266, | |
0.03481012658227848, | |
0.056962025316455694, | |
1.0 | |
] | |
} | |
} | |
}, | |
{ | |
"model_id": "gpt2", | |
"scores": { | |
"ioi": { | |
"edge_counts": [ | |
3.28125, | |
10.35546875, | |
31.35416603088379, | |
83.15755462646484, | |
279.3997497558594, | |
1350.0208740234375, | |
4442.87890625, | |
12654.0947265625, | |
34932.49609375, | |
69865.0 | |
], | |
"faithfulness": [ | |
-3.380787646439662e-08, | |
0.0, | |
-1.5450199544229257e-05, | |
-6.119225640055789e-06, | |
0.00011721190770206309, | |
0.0020236718694058528, | |
0.015380555318712599, | |
0.03909438029925786, | |
0.04813088759942643, | |
1.0 | |
] | |
} | |
} | |
} | |
] | |
} |