Aaron Mueller
updated filtering, add F= tab
1d8e193
{
"method_name": "EAP (OA)",
"results": [
{
"model_id": "gpt2",
"scores": {
"ioi": {
"edge_counts": [
3.0,
8.0,
18.0,
66.0,
119.0,
616.0,
2699.0,
6055.0,
14988.0,
32491.0
],
"faithfulness": [
0.0,
0.0,
0.0,
-0.02605376953427554,
-0.027599262798968967,
0.018682773460249002,
0.3035174289407458,
0.905198792680162,
1.2374291327745524,
1.0
]
}
}
},
{
"model_id": "qwen2.5",
"scores": {
"mcqa": {
"edge_counts": [
19.0,
81.0,
203.0,
360.0,
872.0,
7146.0,
17450.0,
35719.0,
87921.0,
179749.0
],
"faithfulness": [
0.0,
-0.0031645569620253164,
-0.006329113924050633,
-0.015822784810126583,
0.03164556962025317,
0.09651898734177215,
0.189873417721519,
0.13132911392405064,
0.07278481012658228,
1.0
]
},
"ioi": {
"edge_counts": [
7.0,
62.0,
163.0,
393.0,
868.0,
7103.0,
17098.0,
35450.0,
88180.0,
179749.0
],
"faithfulness": [
0.0,
0.0,
0.0,
0.16043613707165108,
0.22429906542056074,
0.38317757009345793,
0.5595794392523364,
0.6433021806853583,
0.6448598130841121,
1.0
]
}
}
}
]
}