Spaces:
Running
Running
jasonshaoshun
commited on
Commit
·
76717d0
1
Parent(s):
ae2cd7a
fix: resolve inconsistent variable naming in causal graph (IOI and ravel)
Browse files- app.py +1 -1
- src/about.py +1 -1
- src/leaderboard/read_evals.py +8 -6
app.py
CHANGED
@@ -323,7 +323,7 @@ def init_leaderboard_mib_causalgraph(dataframe, track):
|
|
323 |
"arithmetic_addition": "Arithmetic (+)",
|
324 |
"arithmetic_subtraction": "Arithmetic (-)",
|
325 |
"ARC_easy": "ARC (Easy)",
|
326 |
-
"
|
327 |
}
|
328 |
|
329 |
target_variables_mapping = {
|
|
|
323 |
"arithmetic_addition": "Arithmetic (+)",
|
324 |
"arithmetic_subtraction": "Arithmetic (-)",
|
325 |
"ARC_easy": "ARC (Easy)",
|
326 |
+
"RAVEL": "RAVEL"
|
327 |
}
|
328 |
|
329 |
target_variables_mapping = {
|
src/about.py
CHANGED
@@ -85,7 +85,7 @@ class TaskMIB_Causalgraph:
|
|
85 |
class TasksMib_Causalgraph(Enum):
|
86 |
task0 = TaskMIB_Causalgraph("ioi", ["Qwen2ForCausalLM", "GPT2ForCausalLM", "GPT2LMHeadModel", "Gemma2ForCausalLM", "LlamaForCausalLM"], "ioi_task", ["average_score"], ["output_token", "output_position"])
|
87 |
task1 = TaskMIB_Causalgraph("mcqa", ["Qwen2ForCausalLM", "GPT2ForCausalLM", "GPT2LMHeadModel", "Gemma2ForCausalLM", "LlamaForCausalLM"], "4_answer_MCQA", ["average_score"], ["answer_pointer", "answer"])
|
88 |
-
task2 = TaskMIB_Causalgraph("ravel", ["Qwen2ForCausalLM", "GPT2ForCausalLM", "GPT2LMHeadModel", "Gemma2ForCausalLM", "LlamaForCausalLM"], "
|
89 |
task3 = TaskMIB_Causalgraph("arc_easy", ["Qwen2ForCausalLM", "GPT2ForCausalLM", "GPT2LMHeadModel", "Gemma2ForCausalLM", "LlamaForCausalLM"], "ARC_easy", ["average_score"], ["answer_pointer", "answer"])
|
90 |
|
91 |
@classmethod
|
|
|
85 |
class TasksMib_Causalgraph(Enum):
|
86 |
task0 = TaskMIB_Causalgraph("ioi", ["Qwen2ForCausalLM", "GPT2ForCausalLM", "GPT2LMHeadModel", "Gemma2ForCausalLM", "LlamaForCausalLM"], "ioi_task", ["average_score"], ["output_token", "output_position"])
|
87 |
task1 = TaskMIB_Causalgraph("mcqa", ["Qwen2ForCausalLM", "GPT2ForCausalLM", "GPT2LMHeadModel", "Gemma2ForCausalLM", "LlamaForCausalLM"], "4_answer_MCQA", ["average_score"], ["answer_pointer", "answer"])
|
88 |
+
task2 = TaskMIB_Causalgraph("ravel", ["Qwen2ForCausalLM", "GPT2ForCausalLM", "GPT2LMHeadModel", "Gemma2ForCausalLM", "LlamaForCausalLM"], "RAVEL", ["average_score"], ["Continent", "Language", "Country", "Language"])
|
89 |
task3 = TaskMIB_Causalgraph("arc_easy", ["Qwen2ForCausalLM", "GPT2ForCausalLM", "GPT2LMHeadModel", "Gemma2ForCausalLM", "LlamaForCausalLM"], "ARC_easy", ["average_score"], ["answer_pointer", "answer"])
|
90 |
|
91 |
@classmethod
|
src/leaderboard/read_evals.py
CHANGED
@@ -387,6 +387,7 @@ def get_raw_eval_results_mib_causalgraph(results_path: str) -> Tuple[pd.DataFram
|
|
387 |
for root, _, files in os.walk(results_path):
|
388 |
for file in files:
|
389 |
if file.endswith('.json'):
|
|
|
390 |
json_files.append(os.path.join(root, file))
|
391 |
|
392 |
if not json_files:
|
@@ -400,12 +401,13 @@ def get_raw_eval_results_mib_causalgraph(results_path: str) -> Tuple[pd.DataFram
|
|
400 |
with open(json_file, 'r') as f:
|
401 |
data = json.load(f)
|
402 |
|
403 |
-
# Check if this is the consolidated format by examining a sample key
|
404 |
-
sample_key = next(iter(data), None)
|
405 |
-
if sample_key and isinstance(sample_key, str) and '(' in sample_key and ')' in sample_key:
|
406 |
-
|
407 |
-
|
408 |
-
|
|
|
409 |
except Exception as e:
|
410 |
print(f"Error reading {json_file}: {e}")
|
411 |
|
|
|
387 |
for root, _, files in os.walk(results_path):
|
388 |
for file in files:
|
389 |
if file.endswith('.json'):
|
390 |
+
# print(f"Found JSON file: {file} in {root}")
|
391 |
json_files.append(os.path.join(root, file))
|
392 |
|
393 |
if not json_files:
|
|
|
401 |
with open(json_file, 'r') as f:
|
402 |
data = json.load(f)
|
403 |
|
404 |
+
# # Check if this is the consolidated format by examining a sample key
|
405 |
+
# sample_key = next(iter(data), None)
|
406 |
+
# if sample_key and isinstance(sample_key, str) and '(' in sample_key and ')' in sample_key:
|
407 |
+
# raw_data = data
|
408 |
+
# print(f"Found consolidated data file: {json_file}")
|
409 |
+
# break
|
410 |
+
raw_data = data
|
411 |
except Exception as e:
|
412 |
print(f"Error reading {json_file}: {e}")
|
413 |
|