Spaces:
Sleeping
Sleeping
File size: 5,486 Bytes
33d4721 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
from typing import Any, Type
from autotrain.backends.base import AVAILABLE_HARDWARE
def common_args():
args = [
{
"arg": "--train",
"help": "Command to train the model",
"required": False,
"action": "store_true",
},
{
"arg": "--deploy",
"help": "Command to deploy the model (limited availability)",
"required": False,
"action": "store_true",
},
{
"arg": "--inference",
"help": "Command to run inference (limited availability)",
"required": False,
"action": "store_true",
},
{
"arg": "--username",
"help": "Hugging Face Hub Username",
"required": False,
"type": str,
},
{
"arg": "--backend",
"help": "Backend to use: default or spaces. Spaces backend requires push_to_hub & username. Advanced users only.",
"required": False,
"type": str,
"default": "local",
"choices": AVAILABLE_HARDWARE.keys(),
},
{
"arg": "--token",
"help": "Your Hugging Face API token. Token must have write access to the model hub.",
"required": False,
"type": str,
},
{
"arg": "--push-to-hub",
"help": "Push to hub after training will push the trained model to the Hugging Face model hub.",
"required": False,
"action": "store_true",
},
{
"arg": "--model",
"help": "Base model to use for training",
"required": True,
"type": str,
},
{
"arg": "--project-name",
"help": "Output directory / repo id for trained model (must be unique on hub)",
"required": True,
"type": str,
},
{
"arg": "--data-path",
"help": "Train dataset to use. When using cli, this should be a directory path containing training and validation data in appropriate formats",
"required": False,
"type": str,
},
{
"arg": "--train-split",
"help": "Train dataset split to use",
"required": False,
"type": str,
"default": "train",
},
{
"arg": "--valid-split",
"help": "Validation dataset split to use",
"required": False,
"type": str,
"default": None,
},
{
"arg": "--batch-size",
"help": "Training batch size to use",
"required": False,
"type": int,
"default": 2,
"alias": ["--train-batch-size"],
},
{
"arg": "--seed",
"help": "Random seed for reproducibility",
"required": False,
"default": 42,
"type": int,
},
{
"arg": "--epochs",
"help": "Number of training epochs",
"required": False,
"default": 1,
"type": int,
},
{
"arg": "--gradient-accumulation",
"help": "Gradient accumulation steps",
"required": False,
"default": 1,
"type": int,
"alias": ["--gradient-accumulation"],
},
{
"arg": "--disable-gradient-checkpointing",
"help": "Disable gradient checkpointing",
"required": False,
"action": "store_true",
"alias": ["--disable-gradient-checkpointing", "--disable-gc"],
},
{
"arg": "--lr",
"help": "Learning rate",
"required": False,
"default": 5e-4,
"type": float,
},
{
"arg": "--log",
"help": "Use experiment tracking",
"required": False,
"type": str,
"default": "none",
"choices": ["none", "wandb", "tensorboard"],
},
]
return args
def python_type_from_schema_field(field_data: dict) -> Type:
"""Converts JSON schema field types to Python types."""
type_map = {
"string": str,
"number": float,
"integer": int,
"boolean": bool,
}
field_type = field_data.get("type")
if field_type:
return type_map.get(field_type, str)
elif "anyOf" in field_data:
for type_option in field_data["anyOf"]:
if type_option["type"] != "null":
return type_map.get(type_option["type"], str)
return str
def get_default_value(field_data: dict) -> Any:
return field_data["default"]
def get_field_info(params_class):
schema = params_class.model_json_schema()
properties = schema.get("properties", {})
field_info = []
for field_name, field_data in properties.items():
temp_info = {
"arg": f"--{field_name.replace('_', '-')}",
"alias": [f"--{field_name}", f"--{field_name.replace('_', '-')}"],
"type": python_type_from_schema_field(field_data),
"help": field_data.get("title", ""),
"default": get_default_value(field_data),
}
if temp_info["type"] == bool:
temp_info["action"] = "store_true"
field_info.append(temp_info)
return field_info
|