Spaces:

whitecircle-ai
/

circle-guard-bench

Running

apsys commited on Apr 24

Commit

f469537

1 Parent(s): 2f4ffea

new metrics

Files changed (1) hide show

src/leaderboard/processor.py CHANGED Viewed

@@ -29,8 +29,10 @@ def calculate_integral_score(row: pd.Series) -> float:
     for test_type in TEST_TYPES:
         metric_col = f"{test_type}_accuracy"
         if metric_col in row and pd.notna(row[metric_col]):
             integral_score *= row[metric_col]
             metric_count += 1
     # If no accuracy metrics were found at all, the score remains 1.0 before penalties.
     # The library returns 0.0 in this case (`return integral_score if count > 0 else 0.0`)
@@ -42,8 +44,8 @@ def calculate_integral_score(row: pd.Series) -> float:
     micro_error_col = "micro_avg_error_ratio"
     if micro_error_col in row and pd.notna(row[micro_error_col]):
         # Micro error is stored as %, convert back to ratio
-        # micro_error_ratio = row[micro_error_col] / 100.0
-        integral_score *= (1.0 - row[micro_error_col])
     # Runtime Penalty
     avg_runtime_ms = None # Initialize

     for test_type in TEST_TYPES:
         metric_col = f"{test_type}_accuracy"
         if metric_col in row and pd.notna(row[metric_col]):
+            print(f"Found accuracy metric for {test_type}: {row[metric_col]}")
             integral_score *= row[metric_col]
             metric_count += 1
+    print(f"Metric count: {metric_count}")
     # If no accuracy metrics were found at all, the score remains 1.0 before penalties.
     # The library returns 0.0 in this case (`return integral_score if count > 0 else 0.0`)
     micro_error_col = "micro_avg_error_ratio"
     if micro_error_col in row and pd.notna(row[micro_error_col]):
         # Micro error is stored as %, convert back to ratio
+        micro_error_ratio = row[micro_error_col] / 100.0
+        integral_score *= (1.0 - micro_error_ratio)
     # Runtime Penalty
     avg_runtime_ms = None # Initialize