Aaron Mueller
updated filtering, add F= tab
1d8e193
{
"method_name": "IFR",
"results": [
{
"model_id": "gemma2",
"scores": {
"ioi": {
"edge_counts": [
0.0,
0.0,
0.0,
0.0,
0.0,
60.0,
697.0,
2012.0,
27442.0,
74218.0
],
"faithfulness": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
1.0
]
},
"arc_easy": {
"edge_counts": [
0.0,
0.0,
0.0,
0.0,
0.0,
34.0,
569.0,
1843.0,
23583.0,
74218.0
],
"faithfulness": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
1.0
]
},
"mcqa": {
"edge_counts": [
0.0,
0.0,
0.0,
0.0,
0.0,
48.0,
282.0,
1912.0,
36317.0,
74218.0
],
"faithfulness": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.18877551020408162,
1.0
]
}
}
},
{
"model_id": "llama3",
"scores": {
"arc_easy": {
"edge_counts": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
81121.0,
678541.0,
1592881.0
],
"faithfulness": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.24895833333333334,
1.0
]
},
"arithmetic_addition": {
"edge_counts": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
19009.0,
245201.0,
728253.0,
1592881.0
],
"faithfulness": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.9435294117647058,
1.3129411764705883,
1.0
]
},
"arithmetic_subtraction": {
"edge_counts": [
0.0,
0.0,
0.0,
0.0,
0.0,
1719.0,
104811.0,
210163.0,
726283.0,
1592881.0
],
"faithfulness": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.39503205128205127,
0.9198717948717948,
1.2788461538461537,
1.0
]
},
"mcqa": {
"edge_counts": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
11420.0,
249100.0,
687383.0,
1592881.0
],
"faithfulness": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.07228915662650602,
0.7018072289156626,
1.0
]
}
}
},
{
"model_id": "gpt2",
"scores": {
"ioi": {
"edge_counts": [
0.0,
0.0,
1.0,
5.0,
18.0,
61.0,
431.0,
2217.0,
11133.0,
32491.0
],
"faithfulness": [
0.0,
0.0,
0.0,
0.0,
9.699480413473062e-05,
0.002604457555290081,
0.005239274585697969,
0.007735479313229455,
0.8286768164229479,
1.0
]
}
}
},
{
"model_id": "qwen2.5",
"scores": {
"mcqa": {
"edge_counts": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
6114.0,
78382.0,
179749.0
],
"faithfulness": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.6593059936908517,
1.0
]
},
"ioi": {
"edge_counts": [
0.0,
0.0,
0.0,
0.0,
0.0,
18.0,
18.0,
8519.0,
77692.0,
179749.0
],
"faithfulness": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.20249221183800623,
1.0
]
}
}
}
]
}