Spaces:
Running
Running
jasonshaoshun
commited on
Commit
·
ad60993
1
Parent(s):
76717d0
fix: resolve inconsistent variable naming in causal graph (IOI and ravel)
Browse files- app.py +1 -1
- src/leaderboard/read_evals.py +7 -8
app.py
CHANGED
@@ -214,7 +214,7 @@ LEADERBOARD_DF_MIB_SUBGRAPH_FEQ = get_leaderboard_df_mib_subgraph(EVAL_RESULTS_M
|
|
214 |
metric_type="CMD")
|
215 |
|
216 |
# In app.py, modify the LEADERBOARD initialization
|
217 |
-
|
218 |
EVAL_RESULTS_MIB_CAUSALGRAPH_PATH
|
219 |
)
|
220 |
|
|
|
214 |
metric_type="CMD")
|
215 |
|
216 |
# In app.py, modify the LEADERBOARD initialization
|
217 |
+
LEADERBOARD_DF_MIB_CAUSALGRAPH_AGGREGATED, LEADERBOARD_DF_MIB_CAUSALGRAPH_AVERAGED = get_leaderboard_df_mib_causalgraph(
|
218 |
EVAL_RESULTS_MIB_CAUSALGRAPH_PATH
|
219 |
)
|
220 |
|
src/leaderboard/read_evals.py
CHANGED
@@ -401,13 +401,12 @@ def get_raw_eval_results_mib_causalgraph(results_path: str) -> Tuple[pd.DataFram
|
|
401 |
with open(json_file, 'r') as f:
|
402 |
data = json.load(f)
|
403 |
|
404 |
-
#
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
raw_data = data
|
411 |
except Exception as e:
|
412 |
print(f"Error reading {json_file}: {e}")
|
413 |
|
@@ -513,7 +512,7 @@ def get_raw_eval_results_mib_causalgraph(results_path: str) -> Tuple[pd.DataFram
|
|
513 |
# intervention_averaged_mean_df = create_intervention_averaged_df(detailed_df_mean)
|
514 |
|
515 |
# return detailed_df_highest, detailed_df_mean, intervention_averaged_highest_df
|
516 |
-
return detailed_df_highest,
|
517 |
|
518 |
|
519 |
|
|
|
401 |
with open(json_file, 'r') as f:
|
402 |
data = json.load(f)
|
403 |
|
404 |
+
# Check if this is the consolidated format by examining a sample key
|
405 |
+
sample_key = next(iter(data), None)
|
406 |
+
if sample_key and isinstance(sample_key, str) and '(' in sample_key and ')' in sample_key:
|
407 |
+
raw_data = data
|
408 |
+
print(f"Found consolidated data file: {json_file}")
|
409 |
+
|
|
|
410 |
except Exception as e:
|
411 |
print(f"Error reading {json_file}: {e}")
|
412 |
|
|
|
512 |
# intervention_averaged_mean_df = create_intervention_averaged_df(detailed_df_mean)
|
513 |
|
514 |
# return detailed_df_highest, detailed_df_mean, intervention_averaged_highest_df
|
515 |
+
return detailed_df_highest, detailed_df_mean
|
516 |
|
517 |
|
518 |
|