Spaces:
Sleeping
Sleeping
File size: 4,386 Bytes
0855f92 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
#!/usr/bin/env python3
"""
Example Usage of Napolab Leaderboard Data Management
This script demonstrates how to use the YAML-based data management system.
"""
from data_loader import NapolabDataLoader
from manage_data import validate_yaml_structure, add_dataset, add_benchmark_result, add_model_metadata, export_data
import yaml
def example_usage():
"""Demonstrate the data management functionality."""
print("π Napolab Leaderboard Data Management Example")
print("=" * 50)
# 1. Load existing data
print("\n1. Loading existing data...")
data_loader = NapolabDataLoader()
data = data_loader.data
print(f"β
Loaded {len(data['datasets'])} datasets")
print(f"β
Loaded {len(data['model_metadata'])} models")
# 2. Validate the data structure
print("\n2. Validating data structure...")
if validate_yaml_structure(data):
print("β
Data structure is valid!")
else:
print("β Data structure has issues!")
return
# 3. Add a new dataset
print("\n3. Adding a new dataset...")
data = add_dataset(
data=data,
dataset_name="example_dataset",
name="Example Dataset",
description="An example dataset for demonstration",
tasks=["Classification", "Sentiment Analysis"],
url="https://huggingface.co/datasets/example"
)
# 4. Add a new model
print("\n4. Adding a new model...")
data = add_model_metadata(
data=data,
model_name="example-model",
parameters=125000000,
architecture="BERT Large",
base_model="bert-large-uncased",
task="Classification",
huggingface_url="https://huggingface.co/example/model"
)
# 5. Add benchmark results
print("\n5. Adding benchmark results...")
data = add_benchmark_result(
data=data,
dataset_name="example_dataset",
model_name="example-model",
metrics={
"accuracy": 0.89,
"f1": 0.88,
"precision": 0.90,
"recall": 0.87
}
)
# 6. Export the updated data
print("\n6. Exporting updated data...")
export_data(data, "example_updated_data.yaml")
# 7. Demonstrate data access
print("\n7. Demonstrating data access...")
# Get dataset info
dataset_info = data_loader.get_dataset_info("assin")
if dataset_info:
print(f"π ASSIN dataset: {dataset_info['name']}")
print(f" Tasks: {', '.join(dataset_info['tasks'])}")
# Get available models for a dataset
models = data_loader.get_available_models_for_dataset("assin")
print(f"π€ Available models for ASSIN: {len(models)} models")
# Get model info
model_info = data_loader.get_model_info("mdeberta-v3-base-assin-similarity")
if model_info:
print(f"π§ Model parameters: {model_info['parameters']:,}")
print(f" Architecture: {model_info['architecture']}")
print("\nβ
Example completed successfully!")
print("π Check 'example_updated_data.yaml' for the updated data")
def demonstrate_yaml_structure():
"""Show the YAML structure."""
print("\nπ YAML Data Structure Example:")
print("-" * 30)
example_data = {
'datasets': {
'my_dataset': {
'name': 'My Dataset',
'description': 'A custom dataset',
'tasks': ['Classification'],
'url': 'https://huggingface.co/datasets/my_dataset'
}
},
'benchmark_results': {
'my_dataset': {
'my-model': {
'accuracy': 0.92,
'f1': 0.91
}
}
},
'model_metadata': {
'my-model': {
'parameters': 110000000,
'architecture': 'BERT Base',
'base_model': 'bert-base-uncased',
'task': 'Classification',
'huggingface_url': 'https://huggingface.co/my-model'
}
}
}
print(yaml.dump(example_data, default_flow_style=False, allow_unicode=True))
if __name__ == "__main__":
example_usage()
demonstrate_yaml_structure() |