new metrics
Browse files
src/leaderboard/processor.py
CHANGED
|
@@ -29,8 +29,10 @@ def calculate_integral_score(row: pd.Series) -> float:
|
|
| 29 |
for test_type in TEST_TYPES:
|
| 30 |
metric_col = f"{test_type}_accuracy"
|
| 31 |
if metric_col in row and pd.notna(row[metric_col]):
|
|
|
|
| 32 |
integral_score *= row[metric_col]
|
| 33 |
metric_count += 1
|
|
|
|
| 34 |
|
| 35 |
# If no accuracy metrics were found at all, the score remains 1.0 before penalties.
|
| 36 |
# The library returns 0.0 in this case (`return integral_score if count > 0 else 0.0`)
|
|
@@ -42,8 +44,8 @@ def calculate_integral_score(row: pd.Series) -> float:
|
|
| 42 |
micro_error_col = "micro_avg_error_ratio"
|
| 43 |
if micro_error_col in row and pd.notna(row[micro_error_col]):
|
| 44 |
# Micro error is stored as %, convert back to ratio
|
| 45 |
-
|
| 46 |
-
integral_score *= (1.0 -
|
| 47 |
|
| 48 |
# Runtime Penalty
|
| 49 |
avg_runtime_ms = None # Initialize
|
|
|
|
| 29 |
for test_type in TEST_TYPES:
|
| 30 |
metric_col = f"{test_type}_accuracy"
|
| 31 |
if metric_col in row and pd.notna(row[metric_col]):
|
| 32 |
+
print(f"Found accuracy metric for {test_type}: {row[metric_col]}")
|
| 33 |
integral_score *= row[metric_col]
|
| 34 |
metric_count += 1
|
| 35 |
+
print(f"Metric count: {metric_count}")
|
| 36 |
|
| 37 |
# If no accuracy metrics were found at all, the score remains 1.0 before penalties.
|
| 38 |
# The library returns 0.0 in this case (`return integral_score if count > 0 else 0.0`)
|
|
|
|
| 44 |
micro_error_col = "micro_avg_error_ratio"
|
| 45 |
if micro_error_col in row and pd.notna(row[micro_error_col]):
|
| 46 |
# Micro error is stored as %, convert back to ratio
|
| 47 |
+
micro_error_ratio = row[micro_error_col] / 100.0
|
| 48 |
+
integral_score *= (1.0 - micro_error_ratio)
|
| 49 |
|
| 50 |
# Runtime Penalty
|
| 51 |
avg_runtime_ms = None # Initialize
|