Spaces:
Running
Running
jasonshaoshun
commited on
Commit
Β·
f585ea0
1
Parent(s):
5bcfeb8
debug
Browse files- app.py +27 -3
- src/populate.py +11 -14
app.py
CHANGED
@@ -74,7 +74,15 @@ except Exception:
|
|
74 |
|
75 |
LEADERBOARD_DF_MIB_SUBGRAPH = get_leaderboard_df_mib_subgraph(EVAL_RESULTS_MIB_SUBGRAPH_PATH, EVAL_REQUESTS_PATH, COLS_MIB_SUBGRAPH, BENCHMARK_COLS_MIB_SUBGRAPH)
|
76 |
|
77 |
-
LEADERBOARD_DF_MIB_CAUSALGRAPH = get_leaderboard_df_mib_causalgraph(EVAL_RESULTS_MIB_CAUSALGRAPH_PATH, EVAL_REQUESTS_PATH, COLS_MIB_CAUSALGRAPH, BENCHMARK_COLS_MIB_CAUSALGRAPH)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
# LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
80 |
# LEADERBOARD_DF_MULTIMODAL = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS_MULTIMODAL, BENCHMARK_COLS_MULTIMODAL)
|
@@ -202,9 +210,25 @@ with demo:
|
|
202 |
|
203 |
with gr.TabItem("Subgraph", elem_id="subgraph", id=0):
|
204 |
leaderboard = init_leaderboard_mib_subgraph(LEADERBOARD_DF_MIB_SUBGRAPH, "Subgraph")
|
205 |
-
|
|
|
206 |
with gr.TabItem("Causal Graph", elem_id="causalgraph", id=1):
|
207 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
|
209 |
# with gr.Row():
|
210 |
# with gr.Accordion("π Citation", open=False):
|
|
|
74 |
|
75 |
LEADERBOARD_DF_MIB_SUBGRAPH = get_leaderboard_df_mib_subgraph(EVAL_RESULTS_MIB_SUBGRAPH_PATH, EVAL_REQUESTS_PATH, COLS_MIB_SUBGRAPH, BENCHMARK_COLS_MIB_SUBGRAPH)
|
76 |
|
77 |
+
# LEADERBOARD_DF_MIB_CAUSALGRAPH = get_leaderboard_df_mib_causalgraph(EVAL_RESULTS_MIB_CAUSALGRAPH_PATH, EVAL_REQUESTS_PATH, COLS_MIB_CAUSALGRAPH, BENCHMARK_COLS_MIB_CAUSALGRAPH)
|
78 |
+
|
79 |
+
# In app.py, modify the LEADERBOARD initialization
|
80 |
+
LEADERBOARD_DF_MIB_CAUSALGRAPH_DETAILED, LEADERBOARD_DF_MIB_CAUSALGRAPH_AGGREGATED, LEADERBOARD_DF_MIB_CAUSALGRAPH_AVERAGED = get_leaderboard_df_mib_causalgraph(
|
81 |
+
EVAL_RESULTS_MIB_CAUSALGRAPH_PATH,
|
82 |
+
EVAL_REQUESTS_PATH,
|
83 |
+
COLS_MIB_CAUSALGRAPH,
|
84 |
+
BENCHMARK_COLS_MIB_CAUSALGRAPH
|
85 |
+
)
|
86 |
|
87 |
# LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
88 |
# LEADERBOARD_DF_MULTIMODAL = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS_MULTIMODAL, BENCHMARK_COLS_MULTIMODAL)
|
|
|
210 |
|
211 |
with gr.TabItem("Subgraph", elem_id="subgraph", id=0):
|
212 |
leaderboard = init_leaderboard_mib_subgraph(LEADERBOARD_DF_MIB_SUBGRAPH, "Subgraph")
|
213 |
+
|
214 |
+
# Then modify the Causal Graph tab section
|
215 |
with gr.TabItem("Causal Graph", elem_id="causalgraph", id=1):
|
216 |
+
with gr.Tabs() as causalgraph_tabs:
|
217 |
+
with gr.TabItem("Detailed View", id=0):
|
218 |
+
leaderboard_detailed = init_leaderboard_mib_causalgraph(
|
219 |
+
LEADERBOARD_DF_MIB_CAUSALGRAPH_DETAILED,
|
220 |
+
"Causal Graph"
|
221 |
+
)
|
222 |
+
with gr.TabItem("Aggregated View", id=1):
|
223 |
+
leaderboard_aggregated = init_leaderboard_mib_causalgraph(
|
224 |
+
LEADERBOARD_DF_MIB_CAUSALGRAPH_AGGREGATED,
|
225 |
+
"Causal Graph"
|
226 |
+
)
|
227 |
+
with gr.TabItem("Intervention Averaged", id=2):
|
228 |
+
leaderboard_averaged = init_leaderboard_mib_causalgraph(
|
229 |
+
LEADERBOARD_DF_MIB_CAUSALGRAPH_AVERAGED,
|
230 |
+
"Causal Graph"
|
231 |
+
)
|
232 |
|
233 |
# with gr.Row():
|
234 |
# with gr.Accordion("π Citation", open=False):
|
src/populate.py
CHANGED
@@ -110,26 +110,23 @@ def create_intervention_averaged_df(df: pd.DataFrame) -> pd.DataFrame:
|
|
110 |
return averaged_df
|
111 |
|
112 |
|
113 |
-
def get_leaderboard_df_mib_causalgraph(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
|
114 |
-
"""Creates
|
115 |
-
print(f"
|
116 |
raw_data = get_raw_eval_results_mib_causalgraph(results_path, requests_path)
|
117 |
-
print(f"
|
118 |
-
|
119 |
-
if not raw_data:
|
120 |
-
print("Warning: raw_data is empty")
|
121 |
-
return pd.DataFrame()
|
122 |
|
123 |
# Convert each result to dict format for detailed df
|
124 |
all_data_json = [v.to_dict() for v in raw_data]
|
125 |
-
print(f"Length of all_data_json: {len(all_data_json)}")
|
126 |
-
print(f"First entry of all_data_json: {all_data_json[0] if all_data_json else None}")
|
127 |
-
|
128 |
detailed_df = pd.DataFrame.from_records(all_data_json)
|
129 |
-
print(f"Shape of detailed_df: {detailed_df.shape}")
|
130 |
-
print(f"Columns in detailed_df: {detailed_df.columns.tolist()}")
|
131 |
|
132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
|
134 |
|
135 |
def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|
|
|
110 |
return averaged_df
|
111 |
|
112 |
|
113 |
+
def get_leaderboard_df_mib_causalgraph(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
|
114 |
+
"""Creates three dataframes from all the MIB causal graph experiment results"""
|
115 |
+
print(f"results_path is {results_path}, requests_path is {requests_path}")
|
116 |
raw_data = get_raw_eval_results_mib_causalgraph(results_path, requests_path)
|
117 |
+
print(f"raw_data is {raw_data}")
|
|
|
|
|
|
|
|
|
118 |
|
119 |
# Convert each result to dict format for detailed df
|
120 |
all_data_json = [v.to_dict() for v in raw_data]
|
|
|
|
|
|
|
121 |
detailed_df = pd.DataFrame.from_records(all_data_json)
|
|
|
|
|
122 |
|
123 |
+
# Create aggregated df
|
124 |
+
aggregated_df = aggregate_methods(detailed_df)
|
125 |
+
|
126 |
+
# Create intervention-averaged df
|
127 |
+
intervention_averaged_df = create_intervention_averaged_df(aggregated_df)
|
128 |
+
|
129 |
+
return detailed_df, aggregated_df, intervention_averaged_df
|
130 |
|
131 |
|
132 |
def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|