File size: 9,326 Bytes
a100ebc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4780a48
 
 
 
 
a100ebc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b624a39
a100ebc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b624a39
a100ebc
 
 
 
b624a39
a100ebc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2817fcb
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
import json
import os
import pandas as pd
import numpy as np
from typing import List, Dict, Any, Tuple
from collections import defaultdict

def average_counterfactuals(json_files: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """

    Averages scores across counterfactuals for each layer.

    """
    processed_jsons = []
    
    for json_file in json_files:
        new_json = {
            'method_name': json_file['method_name'],
            'results': []
        }
        
        for result in json_file['results']:
            new_result = {
                'model_id': result['model_id'],
                'task_scores': {}
            }
            
            for task, scores in result['task_scores'].items():
                new_scores = []
                
                for layer_data in scores:
                    new_layer_data = {
                        'layer': layer_data['layer'],
                        'layer_scores': []
                    }
                    
                    for intervention_data in layer_data['layer_scores']:
                        avg_score = np.mean([cf['score'] for cf in intervention_data['counterfactual_scores']])
                        if np.isnan(avg_score):
                            avg_score = 0.0
                        new_layer_data['layer_scores'].append({
                            'intervention': intervention_data['intervention'],
                            'average_score': avg_score
                        })
                    
                    new_scores.append(new_layer_data)
                
                new_result['task_scores'][task] = new_scores
            
            new_json['results'].append(new_result)
        
        processed_jsons.append(new_json)
    
    return processed_jsons

def find_layer_averages(json_files: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """

    Averages scores across layers for each intervention.

    """
    processed_jsons = []
    
    for json_file in json_files:
        new_json = {
            'method_name': json_file['method_name'],
            'results': []
        }
        
        for result in json_file['results']:
            new_result = {
                'model_id': result['model_id'],
                'task_scores': {}
            }
            
            for task, scores in result['task_scores'].items():
                # Group by intervention first
                intervention_scores = defaultdict(list)
                for layer_data in scores:
                    for intervention_data in layer_data['layer_scores']:
                        intervention_key = '_'.join(intervention_data['intervention'])
                        intervention_scores[intervention_key].append(intervention_data['average_score'])
                
                # Average across layers for each intervention
                new_result['task_scores'][task] = [
                    {
                        'intervention': intervention.split('_'),
                        'average_score': np.mean(layer_scores) if layer_scores else 0.0
                    }
                    for intervention, layer_scores in intervention_scores.items()
                ]
            
            new_json['results'].append(new_result)
        
        processed_jsons.append(new_json)
    
    return processed_jsons

def create_summary_dataframe(json_files: List[Dict[str, Any]]) -> pd.DataFrame:
    """

    Creates a summary DataFrame with methods as rows and MODEL_TASK_INTERVENTION as columns.

    Handles duplicate method names by adding a counter suffix.

    """
    data = {}
    method_counters = defaultdict(int)
    
    for json_file in json_files:
        method_name = json_file['method_name']
        # Increment counter for this method name
        method_counters[method_name] += 1
        
        # If this is a duplicate method name, append a counter
        unique_method_name = f"{method_name}_{method_counters[method_name]}"
            
        method_scores = []
        column_names = []
        
        for result in json_file['results']:
            model = result['model_id']
            for task, scores in result['task_scores'].items():
                for score_data in scores:
                    intervention = '_'.join(score_data['intervention'])
                    column = f"{model}_{task}_{intervention}"
                    score = f"{score_data['average_score']:.3f}"
                    method_scores.append((column, score))
        
        # Sort by column names for consistency
        method_scores.sort(key=lambda x: x[0])
        scores_only = [float(score) for _, score in method_scores]
        avg_score = np.mean(scores_only)
        
        # Add average as first column
        data[unique_method_name] = {
            **{col: score for col, score in method_scores}
        }
    
    df = pd.DataFrame.from_dict(data, orient='index')
    
    
    return df


# averaged_cf = average_counterfactuals(json_files)
# layer_averaged = find_layer_averages(averaged_cf)
# detailed_df = create_summary_dataframe(layer_averaged)

def aggregate_methods(df: pd.DataFrame) -> pd.DataFrame:
    """

    Aggregates rows with the same base method name by taking the max value for each column.

    """
    # Create a copy of the DataFrame
    df_copy = df.copy()
    
    # Extract base method names (remove _2, _3, etc. suffixes)
    base_methods = [name.split('_')[0] if '_' in name and name.split('_')[-1].isdigit() 
                   else name for name in df_copy.index]
    df_copy.index = base_methods
    
    # Convert scores to numeric values
    def extract_score(score_str):
        if isinstance(score_str, str):
            return float(score_str)
        return 0.0
    
    numeric_df = df_copy.applymap(extract_score)
    
    # Group by base method name and take the mean
    aggregated_df = numeric_df.groupby(level=0).max().round(2)
    
    # Convert back to string format
    aggregated_df = aggregated_df.applymap(lambda x: f"{x:.3f}")
    
    return aggregated_df

def create_intervention_averaged_df(df: pd.DataFrame) -> pd.DataFrame:
    """

    Creates a DataFrame where columns are model_task and cells are averaged over interventions.

    """
    # Create a copy of the DataFrame
    df_copy = df.copy()
    
    # Remove the Average column if it exists
    if 'Average' in df_copy.columns:
        df_copy = df_copy.drop('Average', axis=1)
    
    # Function to extract score value from string
    def extract_score(score_str):
        if isinstance(score_str, str):
            return float(score_str.split()[0])
        return 0.0
    
    # Convert all scores to numeric values
    numeric_df = df_copy.applymap(extract_score)
    
    # Group columns by model_task
    model_task_groups = {}
    for col in numeric_df.columns:
        model_task = '_'.join(col.split('_')[:2])  # Get model_task part
        if model_task not in model_task_groups:
            model_task_groups[model_task] = []
        model_task_groups[model_task].append(col)
    
    # Create new DataFrame with averaged intervention scores
    averaged_df = pd.DataFrame({
        model_task: numeric_df[cols].mean(axis=1).round(2)
        for model_task, cols in model_task_groups.items()
    })
    
    # Add overall average column
    averaged_df['Average'] = averaged_df.mean(axis=1).round(2)
    
    # Sort by Average column
    averaged_df = averaged_df.sort_values('Average', ascending=False)
    
    return averaged_df

def process_json_folder(folder_path: str) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """

    Processes all JSON files in a folder and returns three DataFrames:

    1. Detailed DataFrame showing all results including duplicates (with layer-averaged scores)

    2. Aggregated DataFrame showing average scores for each base method

    3. Intervention-averaged DataFrame showing means across interventions

    """
    json_files = []
    
    # Read all JSON files
    for filename in os.listdir(folder_path):
        if filename.endswith('.json'):
            with open(os.path.join(folder_path, filename), 'r') as f:
                json_files.append(json.load(f))
    
    # Process the files through each step
    averaged_cf = average_counterfactuals(json_files)
    layer_averaged = find_layer_averages(averaged_cf)
    detailed_df = create_summary_dataframe(layer_averaged)
    aggregated_df = aggregate_methods(detailed_df)
    intervention_averaged_df = create_intervention_averaged_df(aggregated_df)
    
    return detailed_df, aggregated_df, intervention_averaged_df

# Example usage:
if __name__ == "__main__":
    # Replace with your folder path
    folder_path = "./json_files"
    detailed_df, aggregated_df, intervention_averaged_df = process_json_folder(folder_path)
    
    # print("Detailed Results (including duplicates):")
    # print(detailed_df)
    # print("\nAggregated Results (max scores per method):")
    # print(aggregated_df)
    # print("\nIntervention-Averaged Results:")
    # print(intervention_averaged_df)