Spaces:
Sleeping
Sleeping
#!/usr/bin/env python3 | |
""" | |
Validation script for the updated Napolab data structure | |
""" | |
from data_loader import NapolabDataLoader | |
from manage_data import validate_yaml_structure | |
import pandas as pd | |
def main(): | |
"""Validate the updated data structure.""" | |
print("π Validating Updated Napolab Data Structure") | |
print("=" * 50) | |
print("π Data Source: Master's thesis 'Lessons learned from the evaluation of Portuguese language models'") | |
print(" by Ruan Chaves Rodrigues (2023) - University of Malta") | |
print(" Available at: https://www.um.edu.mt/library/oar/handle/123456789/120557") | |
print("=" * 50) | |
# Load data | |
data_loader = NapolabDataLoader() | |
data = data_loader.data | |
# Validate structure | |
print("\n1. Validating YAML structure...") | |
if validate_yaml_structure(data): | |
print("β YAML structure is valid!") | |
else: | |
print("β YAML structure has issues!") | |
return | |
# Check datasets | |
print("\n2. Checking datasets...") | |
datasets = data_loader.get_datasets() | |
print(f"π Found {len(datasets)} datasets:") | |
for name, info in datasets.items(): | |
print(f" - {name}: {info['name']} ({', '.join(info['tasks'])})") | |
# Check benchmark results | |
print("\n3. Checking benchmark results...") | |
benchmark_results = data_loader.get_benchmark_results() | |
print(f"π Found {len(benchmark_results)} benchmark datasets:") | |
for dataset_name, models in benchmark_results.items(): | |
print(f" - {dataset_name}: {len(models)} models") | |
# Check model metadata | |
print("\n4. Checking model metadata...") | |
model_metadata = data_loader.get_model_metadata() | |
print(f"π€ Found {len(model_metadata)} models:") | |
# Group models by architecture | |
architectures = {} | |
for model_name, metadata in model_metadata.items(): | |
arch = metadata['architecture'] | |
if arch not in architectures: | |
architectures[arch] = [] | |
architectures[arch].append(model_name) | |
for arch, models in architectures.items(): | |
print(f" - {arch}: {len(models)} models") | |
for model in models[:3]: # Show first 3 models | |
print(f" * {model}") | |
if len(models) > 3: | |
print(f" ... and {len(models) - 3} more") | |
# Test data access functions | |
print("\n5. Testing data access functions...") | |
# Test getting available models for a dataset | |
test_dataset = list(benchmark_results.keys())[0] | |
models = data_loader.get_available_models_for_dataset(test_dataset) | |
print(f" Available models for {test_dataset}: {len(models)} models") | |
# Test getting model info | |
if models: | |
test_model = models[0] | |
model_info = data_loader.get_model_info(test_model) | |
if model_info: | |
print(f" Model {test_model}: {model_info['parameters']:,} parameters") | |
# Create a summary table | |
print("\n6. Creating summary table...") | |
summary_data = [] | |
for dataset_name, models in benchmark_results.items(): | |
for model_name, metrics in models.items(): | |
if model_name in model_metadata: | |
summary_data.append({ | |
'Dataset': dataset_name, | |
'Model': model_name, | |
'Architecture': model_metadata[model_name]['architecture'], | |
'Parameters': model_metadata[model_name]['parameters'], | |
'Performance': metrics.get('accuracy', 0) | |
}) | |
if summary_data: | |
df = pd.DataFrame(summary_data) | |
print(f"π Summary: {len(df)} model-dataset combinations") | |
print(f" Average performance: {df['Performance'].mean():.3f}") | |
print(f" Best performance: {df['Performance'].max():.3f}") | |
print(f" Models with >0.9 performance: {(df['Performance'] > 0.9).sum()}") | |
print("\nβ Validation completed successfully!") | |
print("π The updated data structure is ready to use!") | |
if __name__ == "__main__": | |
main() |