Spaces:

mib-bench
/

leaderboard

Running

App Files Files Community

Aaron Mueller commited on Jan 30

Commit

fe05167

1 Parent(s): 2d87944

renaming tracks

Browse files

Files changed (2) hide show

app.py +2 -2
eval-results-mib-subgraph/baselines/UGS.json +93 -0

app.py CHANGED Viewed

@@ -791,7 +791,7 @@ with demo:
         # with gr.TabItem("Subgraph", elem_id="subgraph", id=0):
         #     leaderboard = init_leaderboard_mib_subgraph(LEADERBOARD_DF_MIB_SUBGRAPH, "Subgraph")
-        with gr.TabItem("Subgraph", elem_id="subgraph", id=0):
             with gr.Tabs() as subgraph_tabs:
                 with gr.TabItem("F+", id=0):
                     # Add description for filters
@@ -840,7 +840,7 @@ with demo:
                     print(f"Leaderboard is {leaderboard}")
         # Then modify the Causal Graph tab section
-        with gr.TabItem("Causal Graph", elem_id="causalgraph", id=1):
             with gr.Tabs() as causalgraph_tabs:
                 with gr.TabItem("Detailed View", id=0):
                     leaderboard_detailed = init_leaderboard_mib_causalgraph(

         # with gr.TabItem("Subgraph", elem_id="subgraph", id=0):
         #     leaderboard = init_leaderboard_mib_subgraph(LEADERBOARD_DF_MIB_SUBGRAPH, "Subgraph")
+        with gr.TabItem("Circuit Localization", elem_id="subgraph", id=0):
             with gr.Tabs() as subgraph_tabs:
                 with gr.TabItem("F+", id=0):
                     # Add description for filters
                     print(f"Leaderboard is {leaderboard}")
         # Then modify the Causal Graph tab section
+        with gr.TabItem("Causal Variable Localization", elem_id="causalgraph", id=1):
             with gr.Tabs() as causalgraph_tabs:
                 with gr.TabItem("Detailed View", id=0):
                     leaderboard_detailed = init_leaderboard_mib_causalgraph(

eval-results-mib-subgraph/baselines/UGS.json ADDED Viewed

	@@ -0,0 +1,93 @@

+{
+    "method_name": "UGS",
+    "results": [
+        {
+            "model_id": "qwen2.5",
+            "scores": {
+                "ioi": {
+                    "edge_counts": [
+                        164.0,
+                        349.0,
+                        888.0,
+                        1766.0,
+                        3575.0,
+                        8977.0,
+                        17961.0,
+                        35949.0,
+                        89874.0,
+                        179749.0
+                    ],
+                    "faithfulness": [
+                        0.8161993769470405,
+                        1.0623052959501558,
+                        1.1557632398753894,
+                        1.1806853582554517,
+                        1.071651090342679,
+                        1.0093457943925233,
+                        0.9875389408099688,
+                        0.9470404984423676,
+                        0.9719626168224299,
+                        1.0
+                    ]
+                },
+                "mcqa": {
+                    "edge_counts": [
+                        86.0,
+                        212.0,
+                        704.0,
+                        1632.0,
+                        3449.0,
+                        8871.0,
+                        17814.0,
+                        35720.0,
+                        89874.0,
+                        179749.0
+                    ],
+                    "faithfulness": [
+                        0.37104430379746833,
+                        0.4506526898734177,
+                        0.6471518987341772,
+                        0.7231012658227848,
+                        0.9113924050632911,
+                        1.0,
+                        1.5917721518987342,
+                        1.7183544303797469,
+                        1.009493670886076,
+                        1.0
+                    ]
+                }
+            }
+        },
+        {
+            "model_id": "gpt2",
+            "scores": {
+                "ioi": {
+                    "edge_counts": [
+                        27.0,
+                        57.0,
+                        159.0,
+                        322.0,
+                        640.0,
+                        1608.0,
+                        3244.0,
+                        6498.0,
+                        16245.0,
+                        32491.0
+                    ],
+                    "faithfulness": [
+                        0.10013020765541497,
+                        0.33153985647745055,
+                        1.0775680479866294,
+                        0.960686341813994,
+                        1.0155814417206641,
+                        1.0182404988203417,
+                        0.9613478605327729,
+                        0.9464708735339975,
+                        0.9555035267362492,
+                        1.0
+                    ]
+                }
+            }
+        }
+    ]
+}