Spaces:
Sleeping
Sleeping
File size: 5,579 Bytes
33d4721 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
import os
import albumentations as A
import numpy as np
from sklearn import metrics
from autotrain.trainers.image_regression.dataset import ImageRegressionDataset
VALID_METRICS = [
"eval_loss",
"eval_mse",
"eval_mae",
"eval_r2",
"eval_rmse",
"eval_explained_variance",
]
MODEL_CARD = """
---
library_name: transformers
tags:
- autotrain
- vision
- image-classification
- image-regression{base_model}
widget:
- src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/tiger.jpg
example_title: Tiger
- src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/teapot.jpg
example_title: Teapot
- src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/palace.jpg
example_title: Palace{dataset_tag}
---
# Model Trained Using AutoTrain
- Problem type: Image Regression
## Validation Metrics
{validation_metrics}
"""
def image_regression_metrics(pred):
"""
Calculate various regression metrics for image regression tasks.
Args:
pred (tuple): A tuple containing raw predictions and labels.
raw_predictions should be a list of lists or a list of numpy.float32 values.
labels should be a list of true values.
Returns:
dict: A dictionary containing the calculated metrics:
- 'mse': Mean Squared Error
- 'mae': Mean Absolute Error
- 'r2': R^2 Score
- 'rmse': Root Mean Squared Error
- 'explained_variance': Explained Variance Score
If an error occurs during the calculation of a metric, the value for that metric will be -999.
"""
raw_predictions, labels = pred
try:
raw_predictions = [r for preds in raw_predictions for r in preds]
except TypeError as err:
if "numpy.float32" not in str(err):
raise Exception(err)
pred_dict = {}
metrics_to_calculate = {
"mse": metrics.mean_squared_error,
"mae": metrics.mean_absolute_error,
"r2": metrics.r2_score,
"rmse": lambda y_true, y_pred: np.sqrt(metrics.mean_squared_error(y_true, y_pred)),
"explained_variance": metrics.explained_variance_score,
}
for key, func in metrics_to_calculate.items():
try:
pred_dict[key] = float(func(labels, raw_predictions))
except Exception:
pred_dict[key] = -999
return pred_dict
def process_data(train_data, valid_data, image_processor, config):
"""
Processes training and validation data by applying image transformations.
Args:
train_data (Dataset): The training dataset.
valid_data (Dataset or None): The validation dataset. If None, only training data is processed.
image_processor (ImageProcessor): An object containing image processing parameters such as size, mean, and std.
config (dict): Configuration dictionary containing additional parameters for the dataset.
Returns:
tuple: A tuple containing the processed training dataset and the processed validation dataset (or None if valid_data is None).
"""
if "shortest_edge" in image_processor.size:
size = image_processor.size["shortest_edge"]
else:
size = (image_processor.size["height"], image_processor.size["width"])
try:
height, width = size
except TypeError:
height = size
width = size
train_transforms = A.Compose(
[
A.RandomResizedCrop(height=height, width=width),
A.RandomRotate90(),
A.HorizontalFlip(p=0.5),
A.RandomBrightnessContrast(p=0.2),
A.Normalize(mean=image_processor.image_mean, std=image_processor.image_std),
]
)
val_transforms = A.Compose(
[
A.Resize(height=height, width=width),
A.Normalize(mean=image_processor.image_mean, std=image_processor.image_std),
]
)
train_data = ImageRegressionDataset(train_data, train_transforms, config)
if valid_data is not None:
valid_data = ImageRegressionDataset(valid_data, val_transforms, config)
return train_data, valid_data
return train_data, None
def create_model_card(config, trainer):
"""
Generates a model card string based on the provided configuration and trainer.
Args:
config (object): Configuration object containing various settings such as
valid_split, data_path, project_name, and model.
trainer (object): Trainer object used to evaluate the model if validation
split is provided.
Returns:
str: A formatted model card string containing dataset information,
validation metrics, and base model details.
"""
if config.valid_split is not None:
eval_scores = trainer.evaluate()
eval_scores = [f"{k[len('eval_'):]}: {v}" for k, v in eval_scores.items() if k in VALID_METRICS]
eval_scores = "\n\n".join(eval_scores)
else:
eval_scores = "No validation metrics available"
if config.data_path == f"{config.project_name}/autotrain-data" or os.path.isdir(config.data_path):
dataset_tag = ""
else:
dataset_tag = f"\ndatasets:\n- {config.data_path}"
if os.path.isdir(config.model):
base_model = ""
else:
base_model = f"\nbase_model: {config.model}"
model_card = MODEL_CARD.format(
dataset_tag=dataset_tag,
validation_metrics=eval_scores,
base_model=base_model,
)
return model_card
|