Spaces:
Running
Running
<html lang="en"> | |
<head> | |
<meta charset="UTF-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<title>Checker-V1.1: Dynamic-SUPERB Validator</title> | |
<style> | |
body { | |
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; | |
margin: 0; | |
padding: 20px; | |
background: linear-gradient(120deg, #f8fafc 0%, #e9ecef 100%); | |
color: #212529; | |
} | |
/* Menu Styles */ | |
.menu-container { | |
position: fixed; | |
top: 15px; | |
left: 15px; | |
z-index: 1000; | |
} | |
.menu-icon { | |
font-size: 28px; | |
cursor: pointer; | |
user-select: none; | |
color: #667eea; | |
background: white; | |
padding: 8px 12px; | |
border-radius: 8px; | |
box-shadow: 0 2px 8px rgba(0,0,0,0.15); | |
transition: all 0.2s ease; | |
} | |
.menu-icon:hover { | |
background: #667eea; | |
color: white; | |
transform: scale(1.05); | |
} | |
.menu-list { | |
display: none; | |
position: absolute; | |
top: 50px; | |
left: 0; | |
background: white; | |
border: 1px solid #dee2e6; | |
border-radius: 8px; | |
box-shadow: 0 4px 16px rgba(0,0,0,0.15); | |
min-width: 220px; | |
font-size: 14px; | |
overflow: hidden; | |
} | |
.menu-list.show { | |
display: block; | |
animation: slideDown 0.2s ease; | |
} | |
@keyframes slideDown { | |
from { opacity: 0; transform: translateY(-10px); } | |
to { opacity: 1; transform: translateY(0); } | |
} | |
.menu-list a { | |
display: block; | |
padding: 12px 16px; | |
color: #495057; | |
text-decoration: none; | |
border-bottom: 1px solid #f8f9fa; | |
transition: background-color 0.2s ease; | |
} | |
.menu-list a:last-child { | |
border-bottom: none; | |
} | |
.menu-list a:hover { | |
background-color: #667eea; | |
color: white; | |
} | |
.menu-list a::before { | |
content: "β "; | |
margin-right: 8px; | |
opacity: 0; | |
transition: opacity 0.2s ease; | |
} | |
.menu-list a:hover::before { | |
opacity: 1; | |
} | |
.container { | |
max-width: 800px; | |
margin: 0 auto; | |
background: white; | |
border-radius: 8px; | |
box-shadow: 0 2px 10px rgba(0,0,0,0.1); | |
overflow: hidden; | |
} | |
.header { | |
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
color: white; | |
padding: 30px; | |
text-align: center; | |
} | |
.header h1 { | |
margin: 0; | |
font-size: 2.5em; | |
font-weight: 300; | |
} | |
.controls { | |
padding: 20px 30px; | |
border-bottom: 1px solid #dee2e6; | |
background: #f8f9fa; | |
} | |
.upload-section { | |
background: #f5f7fa; | |
border-radius: 10px; | |
padding: 24px; | |
margin-bottom: 24px; | |
border: 1.5px solid #e0e3e8; | |
} | |
label { | |
display: block; | |
margin-bottom: 5px; | |
font-weight: 600; | |
color: #495057; | |
} | |
input[type="file"] { | |
width: 100%; | |
margin: 10px 0; | |
padding: 12px; | |
border: 2px solid #dee2e6; | |
border-radius: 6px; | |
background: white; | |
} | |
.btn { | |
background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); | |
color: white; | |
padding: 12px 28px; | |
border: none; | |
border-radius: 6px; | |
cursor: pointer; | |
font-size: 1.08em; | |
font-weight: 600; | |
margin: 8px 12px 0 0; | |
transition: transform 0.2s; | |
} | |
.btn:hover { | |
transform: translateY(-1px); | |
} | |
.btn:disabled { | |
opacity: 0.7; | |
cursor: not-allowed; | |
} | |
.btn-download { | |
background: #e9ecef; | |
color: #495057; | |
} | |
.validation-panel { | |
margin: 24px 0; | |
border-radius: 12px; | |
overflow: hidden; | |
box-shadow: 0 2px 8px rgba(80,80,160,0.04); | |
} | |
.validation-header { | |
padding: 16px 24px; | |
font-weight: 600; | |
font-size: 1.1em; | |
} | |
.validation-content { | |
padding: 20px 24px; | |
background: white; | |
} | |
.error-list { | |
margin: 0; | |
padding-left: 24px; | |
} | |
.error-list li { | |
margin-bottom: 8px; | |
padding: 6px 0; | |
border-bottom: 1px solid #eee; | |
} | |
.success { background: #d4edda; color: #155724; } | |
.error { background: #f8d7da; color: #721c24; } | |
.warning { background: #fff3cd; color: #856404; } | |
.stats { | |
padding: 15px 30px; | |
background: #e9ecef; | |
font-size: 14px; | |
color: #495057; | |
} | |
</style> | |
</head> | |
<body> | |
<!-- Menu Container --> | |
<div class="menu-container"> | |
<div class="menu-icon" id="menuIcon">β°</div> | |
<div class="menu-list" id="menuList"> | |
<a href="index.html">Leaderboard</a> | |
<a href="https://github.com/dynamic-superb/dynamic-superb" target="_blank">GitHub Repository</a> | |
<a href="https://arxiv.org/abs/2411.05361" target="_blank">Paper on arXiv</a> | |
</div> | |
</div> | |
<div class="container"> | |
<div class="header"> | |
<h1>Dynamic-SUPERB Submission Validator</h1> | |
</div> | |
<div class="controls"> | |
<div class="upload-section"> | |
<label for="submissionFile">Upload Model CSV:</label> | |
<input type="file" id="submissionFile" accept=".csv"> | |
<button id="validateBtn" class="btn" onclick="validateSubmission()">Validate</button> | |
<button id="downloadBtn" class="btn btn-download" style="display:none;" onclick="downloadErrors()">Download Error Report</button> | |
</div> | |
<div id="validationResult"></div> | |
</div> | |
<div class="stats"> | |
Dynamic-SUPERB Submission Validator | |
</div> | |
</div> | |
<script> | |
// Menu functionality | |
const menuIcon = document.getElementById('menuIcon'); | |
const menuList = document.getElementById('menuList'); | |
menuIcon.addEventListener('click', (e) => { | |
e.stopPropagation(); | |
menuList.classList.toggle('show'); | |
}); | |
// Close menu if clicked outside | |
document.addEventListener('click', (e) => { | |
if (!menuIcon.contains(e.target) && !menuList.contains(e.target)) { | |
menuList.classList.remove('show'); | |
} | |
}); | |
let errorReport = []; | |
// Enhanced CSV parsing | |
function parseCSVLine(line) { | |
const result = []; | |
let current = ''; | |
let inQuotes = false; | |
for (let i = 0; i < line.length; i++) { | |
const char = line[i]; | |
if (char === '"') { | |
inQuotes = !inQuotes; | |
} else if (char === ',' && !inQuotes) { | |
result.push(current.trim()); | |
current = ''; | |
} else { | |
current += char; | |
} | |
} | |
result.push(current.trim()); | |
return result; | |
} | |
function normalizeTaskName(name) { | |
return name.trim().toLowerCase().replace(/[_\s]+/g, ' '); | |
} | |
class SimplifiedDynamicSUPERBValidator { | |
constructor() { | |
this.reference = new Map(); | |
this.existingModelNames = new Set(); | |
this.loadReference(); | |
} | |
async loadReference() { | |
try { | |
const response = await fetch('data.csv'); | |
const csvText = await response.text(); | |
const lines = csvText.trim().split('\n'); | |
const headers = parseCSVLine(lines[0]); | |
// Find HigherBetter and Taxonomy column indices in REFERENCE data only | |
const higherBetterIndex = headers.findIndex(h => h.trim().toLowerCase() === 'higherbetter'); | |
const taxonomyIndex = headers.findIndex(h => h.trim().toLowerCase() === 'taxonomy'); | |
// Extract existing model names from reference data | |
const modelStartIndex = higherBetterIndex + 1; | |
const modelEndIndex = taxonomyIndex; | |
for (let i = modelStartIndex; i < modelEndIndex; i++) { | |
this.existingModelNames.add(headers[i].trim()); | |
} | |
for (let i = 1; i < lines.length; i++) { | |
const row = parseCSVLine(lines[i]); | |
if (row.length < 4) continue; | |
const taskName = normalizeTaskName(row[0]); | |
const metric = row[1]; | |
if (metric === 'X') continue; | |
if (!this.reference.has(taskName)) { | |
this.reference.set(taskName, { | |
originalName: row[0], | |
metrics: [], | |
narMetrics: [] | |
}); | |
} | |
const task = this.reference.get(taskName); | |
if (!task.metrics.includes(metric)) task.metrics.push(metric); | |
// Track NAR metrics specifically | |
if (metric.includes('NAR')) { | |
task.narMetrics.push(metric); | |
} | |
} | |
// Sort metrics with NAR first | |
this.reference.forEach(task => { | |
task.metrics.sort((a, b) => { | |
if (a.includes('NAR')) return -1; | |
if (b.includes('NAR')) return 1; | |
return a.localeCompare(b); | |
}); | |
}); | |
} catch (error) { | |
console.error('Error loading reference:', error); | |
} | |
} | |
async validate(file) { | |
const csvText = await this.readFile(file); | |
const lines = csvText.trim().split('\n'); | |
const header = parseCSVLine(lines[0]); | |
console.log('Submission CSV Headers:', header); | |
// SIMPLIFIED: Model columns are everything after "Metric" (index 1) | |
const modelStartIndex = 2; // After "Task Name" and "Metric" | |
const submissionModelNames = header.slice(modelStartIndex); | |
console.log('Found models in submission:', submissionModelNames); | |
console.log('Model count:', submissionModelNames.length); | |
const result = { | |
isValid: true, | |
errors: [], | |
warnings: [], | |
stats: { total: 0, valid: 0, invalid: 0 }, | |
tasks: new Map(), | |
modelNames: submissionModelNames | |
}; | |
// Check for model name overlaps | |
const overlappingModels = submissionModelNames.filter(modelName => | |
this.existingModelNames.has(modelName.trim()) | |
); | |
if (overlappingModels.length > 0) { | |
result.isValid = false; | |
result.errors.push({ | |
line: 'Header', | |
task: 'Model Names', | |
error: `Model name overlap detected: ${overlappingModels.join(', ')} already exist in data.csv` | |
}); | |
} | |
// Validate each row | |
for (let i = 1; i < lines.length; i++) { | |
result.stats.total++; | |
const row = parseCSVLine(lines[i]); | |
const errors = []; | |
// Basic structure check - must have at least Task Name, Metric, and one model column | |
if (row.length < 3) { | |
errors.push('Invalid row format (minimum: Task Name, Metric, and at least one model score)'); | |
result.errors.push({ | |
line: i+1, | |
task: row[0] || 'Unknown', | |
error: 'Invalid row format (minimum: Task Name, Metric, and at least one model score)' | |
}); | |
result.isValid = false; | |
result.stats.invalid++; | |
continue; | |
} | |
const taskName = normalizeTaskName(row[0]); | |
const metric = row[1]; | |
// Skip X metrics | |
if (metric === 'X') { | |
result.warnings.push(`Line ${i+1}: Skipped metric "X"`); | |
continue; | |
} | |
// Task existence check | |
if (!this.reference.has(taskName)) { | |
errors.push(`Unknown task: "${row[0]}"`); | |
} else { | |
const refTask = this.reference.get(taskName); | |
// Metric check | |
if (!refTask.metrics.includes(metric)) { | |
errors.push(`Invalid metric: "${metric}" for task "${row[0]}"`); | |
} | |
// Track metrics per task for NAR check | |
if (!result.tasks.has(taskName)) { | |
result.tasks.set(taskName, { metrics: new Map() }); | |
} | |
const taskData = result.tasks.get(taskName); | |
if (!taskData.metrics.has(metric)) { | |
taskData.metrics.set(metric, []); | |
} | |
// Track which models have this metric | |
for (let j = 0; j < submissionModelNames.length; j++) { | |
const modelValue = row[modelStartIndex + j]; | |
if (modelValue && modelValue !== '-' && modelValue !== '') { | |
taskData.metrics.get(metric).push(submissionModelNames[j]); | |
} | |
} | |
} | |
// Record errors | |
if (errors.length > 0) { | |
result.isValid = false; | |
result.stats.invalid++; | |
errors.forEach(error => { | |
result.errors.push({ | |
line: i+1, | |
task: row[0], | |
error: error | |
}); | |
}); | |
} else { | |
result.stats.valid++; | |
} | |
} | |
// Check for missing NAR metrics for each model | |
result.tasks.forEach((taskData, taskName) => { | |
if (!this.reference.has(taskName)) return; | |
const refTask = this.reference.get(taskName); | |
if (refTask.narMetrics.length === 0) return; // No NAR metrics for this task | |
// For each NAR metric in reference | |
refTask.narMetrics.forEach(narMetric => { | |
// Check if this NAR metric is reported | |
const reportedModels = taskData.metrics.has(narMetric) | |
? taskData.metrics.get(narMetric) | |
: []; | |
// Find models missing this NAR metric | |
const missingModels = submissionModelNames.filter(model => | |
!reportedModels.includes(model) | |
); | |
if (missingModels.length > 0) { | |
result.isValid = false; | |
result.errors.push({ | |
line: 'Multiple', | |
task: refTask.originalName, | |
error: `Missing NAR metric "${narMetric}" for models: ${missingModels.join(', ')}` | |
}); | |
} | |
}); | |
}); | |
return result; | |
} | |
readFile(file) { | |
return new Promise((resolve, reject) => { | |
const reader = new FileReader(); | |
reader.onload = () => resolve(reader.result); | |
reader.onerror = reject; | |
reader.readAsText(file); | |
}); | |
} | |
} | |
// UI Handling | |
const validator = new SimplifiedDynamicSUPERBValidator(); | |
let currentErrors = []; | |
async function validateSubmission() { | |
const fileInput = document.getElementById('submissionFile'); | |
const resultDiv = document.getElementById('validationResult'); | |
const downloadBtn = document.getElementById('downloadBtn'); | |
const validateBtn = document.getElementById('validateBtn'); | |
if (!fileInput.files.length) { | |
resultDiv.innerHTML = `<div class="validation-panel error"> | |
<div class="validation-header">Error</div> | |
<div class="validation-content">Please select a CSV file</div> | |
</div>`; | |
return; | |
} | |
validateBtn.disabled = true; | |
validateBtn.textContent = 'Validating...'; | |
downloadBtn.style.display = 'none'; | |
currentErrors = []; | |
try { | |
const result = await validator.validate(fileInput.files[0]); | |
let html = ''; | |
if (result.isValid) { | |
html = `<div class="validation-panel success"> | |
<div class="validation-header">Validation Successful!</div> | |
<div class="validation-content"> | |
<p><strong>Total Rows:</strong> ${result.stats.total}</p> | |
<p><strong>Valid Rows:</strong> ${result.stats.valid}</p> | |
<p><strong>Models:</strong> ${result.modelNames.length}</p> | |
<p>β All NAR metrics are properly reported for all models!</p> | |
<p>β No model name conflicts detected!</p> | |
<p>β All tasks and metrics are valid!</p> | |
</div> | |
</div>`; | |
} else { | |
currentErrors = result.errors; | |
downloadBtn.style.display = 'inline-block'; | |
html = `<div class="validation-panel error"> | |
<div class="validation-header">Validation Failed</div> | |
<div class="validation-content"> | |
<p><strong>Total Rows:</strong> ${result.stats.total}</p> | |
<p><strong>Valid Rows:</strong> ${result.stats.valid}</p> | |
<p><strong>Errors Found:</strong> ${result.errors.length}</p> | |
<ul class="error-list">${result.errors.map(e => ` | |
<li><strong>Line ${e.line}:</strong> ${e.error} (${e.task})</li> | |
`).join('')}</ul> | |
</div> | |
</div>`; | |
} | |
if (result.warnings.length > 0) { | |
html += `<div class="validation-panel warning"> | |
<div class="validation-header">Warnings</div> | |
<div class="validation-content"> | |
<ul class="error-list">${result.warnings.map(w => ` | |
<li>${w}</li> | |
`).join('')}</ul> | |
</div> | |
</div>`; | |
} | |
resultDiv.innerHTML = html; | |
} catch (error) { | |
resultDiv.innerHTML = `<div class="validation-panel error"> | |
<div class="validation-header">Error</div> | |
<div class="validation-content">${error.message}</div> | |
</div>`; | |
} | |
validateBtn.disabled = false; | |
validateBtn.textContent = 'Validate'; | |
} | |
function downloadErrors() { | |
if (currentErrors.length === 0) return; | |
const csvContent = [ | |
['Line', 'Task', 'Error'], | |
...currentErrors.map(e => [e.line, `"${e.task}"`, `"${e.error}"`]) | |
].map(row => row.join(',')).join('\n'); | |
const blob = new Blob([csvContent], { type: 'text/csv' }); | |
const url = URL.createObjectURL(blob); | |
const a = document.createElement('a'); | |
a.href = url; | |
a.download = 'validation_errors.csv'; | |
document.body.appendChild(a); | |
a.click(); | |
document.body.removeChild(a); | |
URL.revokeObjectURL(url); | |
} | |
</script> | |
</body> | |
</html> | |