atticusg commited on
Commit
9ebccf5
Β·
verified Β·
1 Parent(s): aefedd7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -100
app.py CHANGED
@@ -79,7 +79,7 @@ except Exception:
79
 
80
 
81
  LEADERBOARD_DF_MIB_SUBGRAPH = get_leaderboard_df_mib(EVAL_RESULTS_MIB_SUBGRAPH_PATH, EVAL_REQUESTS_PATH, COLS_MIB, BENCHMARK_COLS_MIB)
82
- LEADERBOARD_DF_MIB_CAUSALGRAPH = get_leaderboard_df_mib(EVAL_RESULTS_MIB_CAUSALGRAPH_PATH, EVAL_REQUESTS_PATH, COLS_MIB, BENCHMARK_COLS_MIB)
83
 
84
  # LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
85
  # LEADERBOARD_DF_MULTIMODAL = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS_MULTIMODAL, BENCHMARK_COLS_MULTIMODAL)
@@ -116,103 +116,6 @@ def init_leaderboard_mib(dataframe, track):
116
  interactive=False,
117
  )
118
 
119
-
120
- def calculate_best_layer_scores(task_data):
121
- """
122
- Calculate the best scores across layers for output token and location
123
-
124
- Args:
125
- task_data: Dictionary containing task scores for different layers
126
-
127
- Returns:
128
- Dictionary with best scores and corresponding layer
129
- """
130
- output_token_scores = [layer_data['output_token'] for layer_data in task_data.values()]
131
- output_location_scores = [layer_data['output_location'] for layer_data in task_data.values()]
132
-
133
- best_output_token = max(output_token_scores)
134
- best_output_location = max(output_location_scores)
135
-
136
- # Find the layer with the best combined performance
137
- layer_scores = [(layer, layer_data['output_token'] + layer_data['output_location'])
138
- for layer, layer_data in task_data.items()]
139
- best_layer = max(layer_scores, key=lambda x: x[1])[0]
140
-
141
- return {
142
- 'output_token': best_output_token,
143
- 'output_location': best_output_location,
144
- 'best_layer': int(best_layer)
145
- }
146
-
147
- def process_single_method(json_data):
148
- """
149
- Process results for a single method into summary rows
150
-
151
- Args:
152
- json_data: Dictionary containing results for one method
153
-
154
- Returns:
155
- List of summary rows for the method
156
- """
157
- summary_rows = []
158
- method_name = json_data['method_name']
159
-
160
- for model_result in json_data['results']:
161
- model_id = model_result['model_id']
162
- task_data = model_result['task_scores']['MCQA']
163
-
164
- best_scores = calculate_best_layer_scores(task_data)
165
-
166
- summary_row = {
167
- 'Method': method_name,
168
- 'Model': model_id,
169
- 'Best Output Token Score': best_scores['output_token'],
170
- 'Best Output Location Score': best_scores['output_location'],
171
- 'Best Layer': best_scores['best_layer']
172
- }
173
- summary_rows.append(summary_row)
174
-
175
- return summary_rows
176
-
177
- def init_leaderboard_mib_causal(json_data_list, track):
178
- """
179
- Creates a leaderboard summary for causal intervention results from multiple methods
180
-
181
- Args:
182
- json_data_list: List of dictionaries containing results for different methods
183
- track: Track identifier (currently unused but maintained for compatibility)
184
-
185
- Returns:
186
- Leaderboard object containing processed and formatted results
187
- """
188
- # Process all methods
189
- all_summary_data = []
190
- for method_data in json_data_list:
191
- method_summary = process_single_method(method_data)
192
- all_summary_data.extend(method_summary)
193
-
194
- # Convert to DataFrame
195
- results_df = pd.DataFrame(all_summary_data)
196
-
197
- # Sort by best score (using output token score as primary metric)
198
- results_df = results_df.sort_values('Best Output Token Score', ascending=False)
199
-
200
- # Round numeric columns to 3 decimal places
201
- numeric_cols = ['Best Output Token Score', 'Best Output Location Score']
202
- results_df[numeric_cols] = results_df[numeric_cols].round(3)
203
-
204
- return Leaderboard(
205
- value=results_df,
206
- datatype=['text', 'text', 'number', 'number', 'number'],
207
- select_columns=SelectColumns(
208
- default_selection=['Method', 'Model', 'Best Output Token Score', 'Best Output Location Score', 'Best Layer'],
209
- cant_deselect=['Method', 'Model'],
210
- label="Select Metrics to Display:",
211
- ),
212
- search_columns=['Method', 'Model'],
213
- interactive=False,
214
- )
215
-
216
  def init_leaderboard(dataframe, track):
217
  if dataframe is None or dataframe.empty:
218
  raise ValueError("Leaderboard DataFrame is empty or None.")
@@ -280,8 +183,8 @@ with demo:
280
  leaderboard = init_leaderboard_mib(LEADERBOARD_DF_MIB_SUBGRAPH, "Subgraph")
281
  # leaderboard = init_leaderboard_mib(LEADERBOARD_DF, "mib")
282
 
283
- with gr.TabItem("Causal Graph", elem_id="causalgraph", id=1):
284
- leaderboard = init_leaderboard_mib_causal(LEADERBOARD_DF_MIB_CAUSALGRAPH, "Causal Graph")
285
 
286
  # with gr.Row():
287
  # with gr.Accordion("πŸ“™ Citation", open=False):
 
79
 
80
 
81
  LEADERBOARD_DF_MIB_SUBGRAPH = get_leaderboard_df_mib(EVAL_RESULTS_MIB_SUBGRAPH_PATH, EVAL_REQUESTS_PATH, COLS_MIB, BENCHMARK_COLS_MIB)
82
+ # LEADERBOARD_DF_MIB_CAUSALGRAPH = get_leaderboard_df_mib_causal(EVAL_RESULTS_MIB_CAUSALGRAPH_PATH, EVAL_REQUESTS_PATH, COLS_MIB, BENCHMARK_COLS_MIB)
83
 
84
  # LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
85
  # LEADERBOARD_DF_MULTIMODAL = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS_MULTIMODAL, BENCHMARK_COLS_MULTIMODAL)
 
116
  interactive=False,
117
  )
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  def init_leaderboard(dataframe, track):
120
  if dataframe is None or dataframe.empty:
121
  raise ValueError("Leaderboard DataFrame is empty or None.")
 
183
  leaderboard = init_leaderboard_mib(LEADERBOARD_DF_MIB_SUBGRAPH, "Subgraph")
184
  # leaderboard = init_leaderboard_mib(LEADERBOARD_DF, "mib")
185
 
186
+ # with gr.TabItem("Causal Graph", elem_id="causalgraph", id=1):
187
+ # leaderboard = init_leaderboard_mib_causal(LEADERBOARD_DF_MIB_CAUSALGRAPH, "Causal Graph")
188
 
189
  # with gr.Row():
190
  # with gr.Accordion("πŸ“™ Citation", open=False):