Spaces:
Running
Running
{ | |
"method_name": "IFR", | |
"results": [ | |
{ | |
"model_id": "gemma2", | |
"scores": { | |
"ioi": { | |
"edge_counts": [ | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
60.0, | |
697.0, | |
2012.0, | |
27442.0, | |
74218.0 | |
], | |
"faithfulness": [ | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
1.0 | |
] | |
}, | |
"arc_easy": { | |
"edge_counts": [ | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
34.0, | |
569.0, | |
1843.0, | |
23583.0, | |
74218.0 | |
], | |
"faithfulness": [ | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
1.0 | |
] | |
}, | |
"mcqa": { | |
"edge_counts": [ | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
48.0, | |
282.0, | |
1912.0, | |
36317.0, | |
74218.0 | |
], | |
"faithfulness": [ | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.18877551020408162, | |
1.0 | |
] | |
} | |
} | |
}, | |
{ | |
"model_id": "llama3", | |
"scores": { | |
"arc_easy": { | |
"edge_counts": [ | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
81121.0, | |
678541.0, | |
1592881.0 | |
], | |
"faithfulness": [ | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.24895833333333334, | |
1.0 | |
] | |
}, | |
"arithmetic_addition": { | |
"edge_counts": [ | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
19009.0, | |
245201.0, | |
728253.0, | |
1592881.0 | |
], | |
"faithfulness": [ | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.9435294117647058, | |
1.3129411764705883, | |
1.0 | |
] | |
}, | |
"arithmetic_subtraction": { | |
"edge_counts": [ | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
1719.0, | |
104811.0, | |
210163.0, | |
726283.0, | |
1592881.0 | |
], | |
"faithfulness": [ | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.39503205128205127, | |
0.9198717948717948, | |
1.2788461538461537, | |
1.0 | |
] | |
}, | |
"mcqa": { | |
"edge_counts": [ | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
11420.0, | |
249100.0, | |
687383.0, | |
1592881.0 | |
], | |
"faithfulness": [ | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.07228915662650602, | |
0.7018072289156626, | |
1.0 | |
] | |
} | |
} | |
}, | |
{ | |
"model_id": "gpt2", | |
"scores": { | |
"ioi": { | |
"edge_counts": [ | |
0.0, | |
0.0, | |
1.0, | |
5.0, | |
18.0, | |
61.0, | |
431.0, | |
2217.0, | |
11133.0, | |
32491.0 | |
], | |
"faithfulness": [ | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
9.699480413473062e-05, | |
0.002604457555290081, | |
0.005239274585697969, | |
0.007735479313229455, | |
0.8286768164229479, | |
1.0 | |
] | |
} | |
} | |
}, | |
{ | |
"model_id": "qwen2.5", | |
"scores": { | |
"mcqa": { | |
"edge_counts": [ | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
6114.0, | |
78382.0, | |
179749.0 | |
], | |
"faithfulness": [ | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.6593059936908517, | |
1.0 | |
] | |
}, | |
"ioi": { | |
"edge_counts": [ | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
18.0, | |
18.0, | |
8519.0, | |
77692.0, | |
179749.0 | |
], | |
"faithfulness": [ | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.0, | |
0.20249221183800623, | |
1.0 | |
] | |
} | |
} | |
} | |
] | |
} |