Aaron Mueller commited on
Commit
fe05167
·
1 Parent(s): 2d87944

renaming tracks

Browse files
app.py CHANGED
@@ -791,7 +791,7 @@ with demo:
791
 
792
  # with gr.TabItem("Subgraph", elem_id="subgraph", id=0):
793
  # leaderboard = init_leaderboard_mib_subgraph(LEADERBOARD_DF_MIB_SUBGRAPH, "Subgraph")
794
- with gr.TabItem("Subgraph", elem_id="subgraph", id=0):
795
  with gr.Tabs() as subgraph_tabs:
796
  with gr.TabItem("F+", id=0):
797
  # Add description for filters
@@ -840,7 +840,7 @@ with demo:
840
  print(f"Leaderboard is {leaderboard}")
841
 
842
  # Then modify the Causal Graph tab section
843
- with gr.TabItem("Causal Graph", elem_id="causalgraph", id=1):
844
  with gr.Tabs() as causalgraph_tabs:
845
  with gr.TabItem("Detailed View", id=0):
846
  leaderboard_detailed = init_leaderboard_mib_causalgraph(
 
791
 
792
  # with gr.TabItem("Subgraph", elem_id="subgraph", id=0):
793
  # leaderboard = init_leaderboard_mib_subgraph(LEADERBOARD_DF_MIB_SUBGRAPH, "Subgraph")
794
+ with gr.TabItem("Circuit Localization", elem_id="subgraph", id=0):
795
  with gr.Tabs() as subgraph_tabs:
796
  with gr.TabItem("F+", id=0):
797
  # Add description for filters
 
840
  print(f"Leaderboard is {leaderboard}")
841
 
842
  # Then modify the Causal Graph tab section
843
+ with gr.TabItem("Causal Variable Localization", elem_id="causalgraph", id=1):
844
  with gr.Tabs() as causalgraph_tabs:
845
  with gr.TabItem("Detailed View", id=0):
846
  leaderboard_detailed = init_leaderboard_mib_causalgraph(
eval-results-mib-subgraph/baselines/UGS.json ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "method_name": "UGS",
3
+ "results": [
4
+ {
5
+ "model_id": "qwen2.5",
6
+ "scores": {
7
+ "ioi": {
8
+ "edge_counts": [
9
+ 164.0,
10
+ 349.0,
11
+ 888.0,
12
+ 1766.0,
13
+ 3575.0,
14
+ 8977.0,
15
+ 17961.0,
16
+ 35949.0,
17
+ 89874.0,
18
+ 179749.0
19
+ ],
20
+ "faithfulness": [
21
+ 0.8161993769470405,
22
+ 1.0623052959501558,
23
+ 1.1557632398753894,
24
+ 1.1806853582554517,
25
+ 1.071651090342679,
26
+ 1.0093457943925233,
27
+ 0.9875389408099688,
28
+ 0.9470404984423676,
29
+ 0.9719626168224299,
30
+ 1.0
31
+ ]
32
+ },
33
+ "mcqa": {
34
+ "edge_counts": [
35
+ 86.0,
36
+ 212.0,
37
+ 704.0,
38
+ 1632.0,
39
+ 3449.0,
40
+ 8871.0,
41
+ 17814.0,
42
+ 35720.0,
43
+ 89874.0,
44
+ 179749.0
45
+ ],
46
+ "faithfulness": [
47
+ 0.37104430379746833,
48
+ 0.4506526898734177,
49
+ 0.6471518987341772,
50
+ 0.7231012658227848,
51
+ 0.9113924050632911,
52
+ 1.0,
53
+ 1.5917721518987342,
54
+ 1.7183544303797469,
55
+ 1.009493670886076,
56
+ 1.0
57
+ ]
58
+ }
59
+ }
60
+ },
61
+ {
62
+ "model_id": "gpt2",
63
+ "scores": {
64
+ "ioi": {
65
+ "edge_counts": [
66
+ 27.0,
67
+ 57.0,
68
+ 159.0,
69
+ 322.0,
70
+ 640.0,
71
+ 1608.0,
72
+ 3244.0,
73
+ 6498.0,
74
+ 16245.0,
75
+ 32491.0
76
+ ],
77
+ "faithfulness": [
78
+ 0.10013020765541497,
79
+ 0.33153985647745055,
80
+ 1.0775680479866294,
81
+ 0.960686341813994,
82
+ 1.0155814417206641,
83
+ 1.0182404988203417,
84
+ 0.9613478605327729,
85
+ 0.9464708735339975,
86
+ 0.9555035267362492,
87
+ 1.0
88
+ ]
89
+ }
90
+ }
91
+ }
92
+ ]
93
+ }