Spaces:
Sleeping
Sleeping
""" | |
Data loader for Napolab Leaderboard | |
Loads datasets, benchmark results, and model metadata from YAML configuration files. | |
""" | |
import yaml | |
import os | |
from pathlib import Path | |
from typing import Dict, Any, Optional | |
class NapolabDataLoader: | |
"""Loads and manages Napolab data from YAML configuration files.""" | |
def __init__(self, data_file: str = "data.yaml"): | |
""" | |
Initialize the data loader. | |
Args: | |
data_file: Path to the YAML data file | |
""" | |
self.data_file = data_file | |
self.data = None | |
self.load_data() | |
def load_data(self) -> None: | |
"""Load data from the YAML file.""" | |
try: | |
# Get the directory where this script is located | |
script_dir = Path(__file__).parent | |
data_path = script_dir / self.data_file | |
if not data_path.exists(): | |
raise FileNotFoundError(f"Data file not found: {data_path}") | |
with open(data_path, 'r', encoding='utf-8') as file: | |
self.data = yaml.safe_load(file) | |
except Exception as e: | |
print(f"Error loading data from {self.data_file}: {e}") | |
# Fallback to empty data structure | |
self.data = { | |
'datasets': {}, | |
'benchmark_results': {}, | |
'model_metadata': {}, | |
'additional_models': {} | |
} | |
def get_datasets(self) -> Dict[str, Any]: | |
"""Get all datasets information.""" | |
return self.data.get('datasets', {}) | |
def get_benchmark_results(self) -> Dict[str, Any]: | |
"""Get all benchmark results.""" | |
return self.data.get('benchmark_results', {}) | |
def get_model_metadata(self) -> Dict[str, Any]: | |
"""Get all model metadata.""" | |
return self.data.get('model_metadata', {}) | |
def get_additional_models(self) -> Dict[str, Any]: | |
"""Get additional models for the Model Hub.""" | |
return self.data.get('additional_models', {}) | |
def get_dataset_info(self, dataset_name: str) -> Optional[Dict[str, Any]]: | |
"""Get information for a specific dataset.""" | |
return self.data.get('datasets', {}).get(dataset_name) | |
def get_benchmark_for_dataset(self, dataset_name: str) -> Optional[Dict[str, Any]]: | |
"""Get benchmark results for a specific dataset.""" | |
return self.data.get('benchmark_results', {}).get(dataset_name) | |
def get_model_info(self, model_name: str) -> Optional[Dict[str, Any]]: | |
"""Get metadata for a specific model.""" | |
return self.data.get('model_metadata', {}).get(model_name) | |
def get_available_datasets(self) -> list: | |
"""Get list of available dataset names.""" | |
return list(self.data.get('datasets', {}).keys()) | |
def get_available_models_for_dataset(self, dataset_name: str) -> list: | |
"""Get list of available models for a specific dataset.""" | |
benchmark = self.get_benchmark_for_dataset(dataset_name) | |
if benchmark: | |
return list(benchmark.keys()) | |
return [] | |
def get_all_models(self) -> list: | |
"""Get list of all available models.""" | |
return list(self.data.get('model_metadata', {}).keys()) | |
def validate_data(self) -> bool: | |
"""Validate the loaded data structure.""" | |
required_keys = ['datasets', 'benchmark_results', 'model_metadata'] | |
for key in required_keys: | |
if key not in self.data: | |
print(f"Missing required key: {key}") | |
return False | |
return True | |
def reload_data(self) -> None: | |
"""Reload data from the YAML file.""" | |
self.load_data() | |
def export_data(self, output_file: str = "exported_data.yaml") -> None: | |
"""Export the current data to a YAML file.""" | |
try: | |
with open(output_file, 'w', encoding='utf-8') as file: | |
yaml.dump(self.data, file, default_flow_style=False, allow_unicode=True, sort_keys=False) | |
print(f"Data exported to {output_file}") | |
except Exception as e: | |
print(f"Error exporting data: {e}") | |
# Global data loader instance | |
data_loader = NapolabDataLoader() | |
# Convenience functions for backward compatibility | |
def get_napolab_datasets() -> Dict[str, Any]: | |
"""Get Napolab datasets (for backward compatibility).""" | |
return data_loader.get_datasets() | |
def get_sample_benchmark_results() -> Dict[str, Any]: | |
"""Get benchmark results (for backward compatibility).""" | |
return data_loader.get_benchmark_results() | |
def get_model_metadata() -> Dict[str, Any]: | |
"""Get model metadata (for backward compatibility).""" | |
return data_loader.get_model_metadata() | |
def get_additional_models() -> Dict[str, Any]: | |
"""Get additional models (for backward compatibility).""" | |
return data_loader.get_additional_models() |