jasonshaoshun commited on
Commit
cefacdb
·
1 Parent(s): f585ea0
Files changed (2) hide show
  1. src/leaderboard/read_evals.py +30 -0
  2. src/populate.py +21 -1
src/leaderboard/read_evals.py CHANGED
@@ -259,6 +259,36 @@ class EvalResult_MIB_CAUSALGRAPH:
259
 
260
  return data_dict
261
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
  def get_raw_eval_results_mib_causalgraph(results_path: str, requests_path: str) -> List[EvalResult_MIB_CAUSALGRAPH]:
263
  model_result_filepaths = []
264
 
 
259
 
260
  return data_dict
261
 
262
+
263
+ # def get_raw_eval_results_mib_causalgraph(results_path: str, requests_path: str) -> List[EvalResult_MIB_CAUSALGRAPH]:
264
+ # """Extract evaluation results for MIB causalgraph"""
265
+ # model_result_filepaths = []
266
+
267
+ # for root, dirnames, files in os.walk(results_path):
268
+ # if len(files) == 0 or any([not f.endswith(".json") for f in files]):
269
+ # continue
270
+
271
+ # try:
272
+ # files.sort(key=lambda x: x.removesuffix(".json").removeprefix("results_")[:-7])
273
+ # except dateutil.parser._parser.ParserError:
274
+ # files = [files[-1]]
275
+
276
+ # for file in files:
277
+ # model_result_filepaths.append(os.path.join(root, file))
278
+
279
+ # eval_results = []
280
+ # for filepath in model_result_filepaths:
281
+ # try:
282
+ # eval_result = EvalResult_MIB_CAUSALGRAPH("", "", {})
283
+ # result = eval_result.init_from_json_file(filepath)
284
+ # result.to_dict() # Verify conversion works
285
+ # eval_results.append(result)
286
+ # except Exception as e:
287
+ # print(f"Error processing {filepath}: {e}")
288
+ # continue
289
+
290
+ # return eval_results
291
+
292
  def get_raw_eval_results_mib_causalgraph(results_path: str, requests_path: str) -> List[EvalResult_MIB_CAUSALGRAPH]:
293
  model_result_filepaths = []
294
 
src/populate.py CHANGED
@@ -1,7 +1,7 @@
1
  import json
2
  import os
3
-
4
  import pandas as pd
 
5
 
6
  from src.display.formatting import has_no_nan_values, make_clickable_model
7
  from src.display.utils import AutoEvalColumn, AutoEvalColumnMultimodal, EvalQueueColumn
@@ -109,6 +109,26 @@ def create_intervention_averaged_df(df: pd.DataFrame) -> pd.DataFrame:
109
 
110
  return averaged_df
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
  def get_leaderboard_df_mib_causalgraph(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
114
  """Creates three dataframes from all the MIB causal graph experiment results"""
 
1
  import json
2
  import os
 
3
  import pandas as pd
4
+ from typing import List, Dict, Tuple
5
 
6
  from src.display.formatting import has_no_nan_values, make_clickable_model
7
  from src.display.utils import AutoEvalColumn, AutoEvalColumnMultimodal, EvalQueueColumn
 
109
 
110
  return averaged_df
111
 
112
+ # def get_leaderboard_df_mib_causalgraph(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
113
+ # """Creates a dataframe from all the MIB causal graph experiment results"""
114
+ # print(f"results_path is {results_path}, requests_path is {requests_path}")
115
+ # raw_data = get_raw_eval_results_mib_causalgraph(results_path, requests_path)
116
+ # print(f"raw_data is {raw_data}")
117
+
118
+ # # Convert each result to dict format for detailed df
119
+ # all_data_json = [v.to_dict() for v in raw_data]
120
+ # detailed_df = pd.DataFrame.from_records(all_data_json)
121
+ # print(f"detailed_df is: {detailed_df}")
122
+
123
+ # # Create and print other views for debugging/reference
124
+ # aggregated_df = aggregate_methods(detailed_df)
125
+ # print(f"aggregated_df is: {aggregated_df}")
126
+
127
+ # intervention_averaged_df = create_intervention_averaged_df(aggregated_df)
128
+ # print(f"intervention_averaged_df is: {intervention_averaged_df}")
129
+
130
+ # # Only return detailed_df for display
131
+ # return detailed_df
132
 
133
  def get_leaderboard_df_mib_causalgraph(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
134
  """Creates three dataframes from all the MIB causal graph experiment results"""