Spaces:
Running
Running
jasonshaoshun
commited on
Commit
·
02e508b
1
Parent(s):
a100ebc
debug
Browse files- src/leaderboard/read_evals.py +9 -3
- src/populate.py +11 -11
src/leaderboard/read_evals.py
CHANGED
@@ -258,12 +258,14 @@ class EvalResult_MIB_CAUSALGRAPH:
|
|
258 |
data_dict[col_name] = round(np.mean(scores), 3)
|
259 |
|
260 |
return data_dict
|
261 |
-
|
262 |
def get_raw_eval_results_mib_causalgraph(results_path: str, requests_path: str) -> List[EvalResult_MIB_CAUSALGRAPH]:
|
263 |
-
"""Extract evaluation results for MIB causalgraph"""
|
264 |
model_result_filepaths = []
|
265 |
|
|
|
266 |
for root, dirnames, files in os.walk(results_path):
|
|
|
|
|
267 |
if len(files) == 0 or any([not f.endswith(".json") for f in files]):
|
268 |
continue
|
269 |
|
@@ -275,17 +277,21 @@ def get_raw_eval_results_mib_causalgraph(results_path: str, requests_path: str)
|
|
275 |
for file in files:
|
276 |
model_result_filepaths.append(os.path.join(root, file))
|
277 |
|
|
|
|
|
278 |
eval_results = []
|
279 |
for filepath in model_result_filepaths:
|
280 |
try:
|
281 |
eval_result = EvalResult_MIB_CAUSALGRAPH("", "", {})
|
282 |
result = eval_result.init_from_json_file(filepath)
|
283 |
-
|
|
|
284 |
eval_results.append(result)
|
285 |
except Exception as e:
|
286 |
print(f"Error processing {filepath}: {e}")
|
287 |
continue
|
288 |
|
|
|
289 |
return eval_results
|
290 |
|
291 |
|
|
|
258 |
data_dict[col_name] = round(np.mean(scores), 3)
|
259 |
|
260 |
return data_dict
|
261 |
+
|
262 |
def get_raw_eval_results_mib_causalgraph(results_path: str, requests_path: str) -> List[EvalResult_MIB_CAUSALGRAPH]:
|
|
|
263 |
model_result_filepaths = []
|
264 |
|
265 |
+
print(f"Scanning directory: {results_path}")
|
266 |
for root, dirnames, files in os.walk(results_path):
|
267 |
+
print(f"Current directory: {root}")
|
268 |
+
print(f"Found files: {files}")
|
269 |
if len(files) == 0 or any([not f.endswith(".json") for f in files]):
|
270 |
continue
|
271 |
|
|
|
277 |
for file in files:
|
278 |
model_result_filepaths.append(os.path.join(root, file))
|
279 |
|
280 |
+
print(f"Found json files: {model_result_filepaths}")
|
281 |
+
|
282 |
eval_results = []
|
283 |
for filepath in model_result_filepaths:
|
284 |
try:
|
285 |
eval_result = EvalResult_MIB_CAUSALGRAPH("", "", {})
|
286 |
result = eval_result.init_from_json_file(filepath)
|
287 |
+
print(f"Processed file {filepath}")
|
288 |
+
print(f"Got result: {result}")
|
289 |
eval_results.append(result)
|
290 |
except Exception as e:
|
291 |
print(f"Error processing {filepath}: {e}")
|
292 |
continue
|
293 |
|
294 |
+
print(f"Total results processed: {len(eval_results)}")
|
295 |
return eval_results
|
296 |
|
297 |
|
src/populate.py
CHANGED
@@ -112,23 +112,23 @@ def create_intervention_averaged_df(df: pd.DataFrame) -> pd.DataFrame:
|
|
112 |
|
113 |
def get_leaderboard_df_mib_causalgraph(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
|
114 |
"""Creates a dataframe from all the MIB causal graph experiment results"""
|
115 |
-
print(f"
|
116 |
raw_data = get_raw_eval_results_mib_causalgraph(results_path, requests_path)
|
117 |
-
print(f"raw_data
|
|
|
|
|
|
|
|
|
118 |
|
119 |
# Convert each result to dict format for detailed df
|
120 |
all_data_json = [v.to_dict() for v in raw_data]
|
121 |
-
|
122 |
-
print(f"
|
123 |
-
|
124 |
-
# Create and print other views for debugging/reference
|
125 |
-
aggregated_df = aggregate_methods(detailed_df)
|
126 |
-
print(f"aggregated_df is: {aggregated_df}")
|
127 |
|
128 |
-
|
129 |
-
print(f"
|
|
|
130 |
|
131 |
-
# Only return detailed_df for display
|
132 |
return detailed_df
|
133 |
|
134 |
|
|
|
112 |
|
113 |
def get_leaderboard_df_mib_causalgraph(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
|
114 |
"""Creates a dataframe from all the MIB causal graph experiment results"""
|
115 |
+
print(f"Starting get_leaderboard_df_mib_causalgraph with path: {results_path}")
|
116 |
raw_data = get_raw_eval_results_mib_causalgraph(results_path, requests_path)
|
117 |
+
print(f"Length of raw_data: {len(raw_data) if raw_data else 0}")
|
118 |
+
|
119 |
+
if not raw_data:
|
120 |
+
print("Warning: raw_data is empty")
|
121 |
+
return pd.DataFrame()
|
122 |
|
123 |
# Convert each result to dict format for detailed df
|
124 |
all_data_json = [v.to_dict() for v in raw_data]
|
125 |
+
print(f"Length of all_data_json: {len(all_data_json)}")
|
126 |
+
print(f"First entry of all_data_json: {all_data_json[0] if all_data_json else None}")
|
|
|
|
|
|
|
|
|
127 |
|
128 |
+
detailed_df = pd.DataFrame.from_records(all_data_json)
|
129 |
+
print(f"Shape of detailed_df: {detailed_df.shape}")
|
130 |
+
print(f"Columns in detailed_df: {detailed_df.columns.tolist()}")
|
131 |
|
|
|
132 |
return detailed_df
|
133 |
|
134 |
|