Spaces:

mib-bench
/

leaderboard

Running

jasonshaoshun commited on 14 days ago

Commit

76717d0

1 Parent(s): ae2cd7a

fix: resolve inconsistent variable naming in causal graph (IOI and ravel)

Files changed (3) hide show

app.py CHANGED Viewed

@@ -323,7 +323,7 @@ def init_leaderboard_mib_causalgraph(dataframe, track):
         "arithmetic_addition": "Arithmetic (+)",
         "arithmetic_subtraction": "Arithmetic (-)",
         "ARC_easy": "ARC (Easy)",
-        "RAVEL_task": "RAVEL"
     }
     target_variables_mapping = {

         "arithmetic_addition": "Arithmetic (+)",
         "arithmetic_subtraction": "Arithmetic (-)",
         "ARC_easy": "ARC (Easy)",
+        "RAVEL": "RAVEL"
     }
     target_variables_mapping = {

src/about.py CHANGED Viewed

@@ -85,7 +85,7 @@ class TaskMIB_Causalgraph:
 class TasksMib_Causalgraph(Enum):
     task0 = TaskMIB_Causalgraph("ioi", ["Qwen2ForCausalLM", "GPT2ForCausalLM", "GPT2LMHeadModel", "Gemma2ForCausalLM", "LlamaForCausalLM"], "ioi_task", ["average_score"], ["output_token", "output_position"])
     task1 = TaskMIB_Causalgraph("mcqa", ["Qwen2ForCausalLM", "GPT2ForCausalLM", "GPT2LMHeadModel", "Gemma2ForCausalLM", "LlamaForCausalLM"], "4_answer_MCQA", ["average_score"], ["answer_pointer", "answer"])
-    task2 = TaskMIB_Causalgraph("ravel", ["Qwen2ForCausalLM", "GPT2ForCausalLM", "GPT2LMHeadModel", "Gemma2ForCausalLM", "LlamaForCausalLM"], "RAVEL_task", ["average_score"], ["Continent", "Language", "Country", "Language"])
     task3 = TaskMIB_Causalgraph("arc_easy", ["Qwen2ForCausalLM", "GPT2ForCausalLM", "GPT2LMHeadModel", "Gemma2ForCausalLM", "LlamaForCausalLM"], "ARC_easy", ["average_score"], ["answer_pointer", "answer"])
     @classmethod

 class TasksMib_Causalgraph(Enum):
     task0 = TaskMIB_Causalgraph("ioi", ["Qwen2ForCausalLM", "GPT2ForCausalLM", "GPT2LMHeadModel", "Gemma2ForCausalLM", "LlamaForCausalLM"], "ioi_task", ["average_score"], ["output_token", "output_position"])
     task1 = TaskMIB_Causalgraph("mcqa", ["Qwen2ForCausalLM", "GPT2ForCausalLM", "GPT2LMHeadModel", "Gemma2ForCausalLM", "LlamaForCausalLM"], "4_answer_MCQA", ["average_score"], ["answer_pointer", "answer"])
+    task2 = TaskMIB_Causalgraph("ravel", ["Qwen2ForCausalLM", "GPT2ForCausalLM", "GPT2LMHeadModel", "Gemma2ForCausalLM", "LlamaForCausalLM"], "RAVEL", ["average_score"], ["Continent", "Language", "Country", "Language"])
     task3 = TaskMIB_Causalgraph("arc_easy", ["Qwen2ForCausalLM", "GPT2ForCausalLM", "GPT2LMHeadModel", "Gemma2ForCausalLM", "LlamaForCausalLM"], "ARC_easy", ["average_score"], ["answer_pointer", "answer"])
     @classmethod

src/leaderboard/read_evals.py CHANGED Viewed

@@ -387,6 +387,7 @@ def get_raw_eval_results_mib_causalgraph(results_path: str) -> Tuple[pd.DataFram
     for root, _, files in os.walk(results_path):
         for file in files:
             if file.endswith('.json'):
                 json_files.append(os.path.join(root, file))
     if not json_files:
@@ -400,12 +401,13 @@ def get_raw_eval_results_mib_causalgraph(results_path: str) -> Tuple[pd.DataFram
             with open(json_file, 'r') as f:
                 data = json.load(f)
-                # Check if this is the consolidated format by examining a sample key
-                sample_key = next(iter(data), None)
-                if sample_key and isinstance(sample_key, str) and '(' in sample_key and ')' in sample_key:
-                    raw_data = data
-                    print(f"Found consolidated data file: {json_file}")
-                    break
         except Exception as e:
             print(f"Error reading {json_file}: {e}")

     for root, _, files in os.walk(results_path):
         for file in files:
             if file.endswith('.json'):
+                # print(f"Found JSON file: {file} in {root}")
                 json_files.append(os.path.join(root, file))
     if not json_files:
             with open(json_file, 'r') as f:
                 data = json.load(f)
+                # # Check if this is the consolidated format by examining a sample key
+                # sample_key = next(iter(data), None)
+                # if sample_key and isinstance(sample_key, str) and '(' in sample_key and ')' in sample_key:
+                #     raw_data = data
+                #     print(f"Found consolidated data file: {json_file}")
+                #     break
+                raw_data = data
         except Exception as e:
             print(f"Error reading {json_file}: {e}")