Spaces:
Sleeping
Sleeping
import os | |
from dataclasses import dataclass | |
import albumentations as A | |
import torch | |
from torchmetrics.detection.mean_ap import MeanAveragePrecision | |
from transformers.image_transforms import center_to_corners_format | |
from autotrain.trainers.object_detection.dataset import ObjectDetectionDataset | |
VALID_METRICS = ( | |
"eval_loss", | |
"eval_map", | |
"eval_map_50", | |
"eval_map_75", | |
"eval_map_small", | |
"eval_map_medium", | |
"eval_map_large", | |
"eval_mar_1", | |
"eval_mar_10", | |
"eval_mar_100", | |
"eval_mar_small", | |
"eval_mar_medium", | |
"eval_mar_large", | |
) | |
MODEL_CARD = """ | |
--- | |
library_name: transformers | |
tags: | |
- autotrain | |
- object-detection | |
- vision{base_model} | |
widget: | |
- src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/tiger.jpg | |
example_title: Tiger | |
- src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/teapot.jpg | |
example_title: Teapot | |
- src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/palace.jpg | |
example_title: Palace{dataset_tag} | |
--- | |
# Model Trained Using AutoTrain | |
- Problem type: Object Detection | |
## Validation Metrics | |
{validation_metrics} | |
""" | |
def collate_fn(batch): | |
""" | |
Collates a batch of data for object detection training. | |
Args: | |
batch (list): A list of dictionaries, where each dictionary contains | |
'pixel_values', 'labels', and optionally 'pixel_mask'. | |
Returns: | |
dict: A dictionary with the following keys: | |
- 'pixel_values' (torch.Tensor): A tensor containing stacked pixel values from the batch. | |
- 'labels' (list): A list of labels from the batch. | |
- 'pixel_mask' (torch.Tensor, optional): A tensor containing stacked pixel masks from the batch, | |
if 'pixel_mask' is present in the input batch. | |
""" | |
data = {} | |
data["pixel_values"] = torch.stack([x["pixel_values"] for x in batch]) | |
data["labels"] = [x["labels"] for x in batch] | |
if "pixel_mask" in batch[0]: | |
data["pixel_mask"] = torch.stack([x["pixel_mask"] for x in batch]) | |
return data | |
def process_data(train_data, valid_data, image_processor, config): | |
""" | |
Processes training and validation data for object detection. | |
Args: | |
train_data (list): List of training data samples. | |
valid_data (list or None): List of validation data samples. If None, only training data is processed. | |
image_processor (object): An image processor object that contains image processing configurations. | |
config (dict): Configuration dictionary containing various settings for data processing. | |
Returns: | |
tuple: A tuple containing processed training data and validation data (if provided). If validation data is not provided, the second element of the tuple is None. | |
""" | |
max_size = image_processor.size["longest_edge"] | |
basic_transforms = [ | |
A.LongestMaxSize(max_size=max_size), | |
A.PadIfNeeded(max_size, max_size, border_mode=0, value=(128, 128, 128), position="top_left"), | |
] | |
train_transforms = A.Compose( | |
[ | |
A.Compose( | |
[ | |
A.SmallestMaxSize(max_size=max_size, p=1.0), | |
A.RandomSizedBBoxSafeCrop(height=max_size, width=max_size, p=1.0), | |
], | |
p=0.2, | |
), | |
A.OneOf( | |
[ | |
A.Blur(blur_limit=7, p=0.5), | |
A.MotionBlur(blur_limit=7, p=0.5), | |
A.Defocus(radius=(1, 5), alias_blur=(0.1, 0.25), p=0.1), | |
], | |
p=0.1, | |
), | |
A.Perspective(p=0.1), | |
A.HorizontalFlip(p=0.5), | |
A.RandomBrightnessContrast(p=0.5), | |
A.HueSaturationValue(p=0.1), | |
*basic_transforms, | |
], | |
bbox_params=A.BboxParams(format="coco", label_fields=["category"], clip=True, min_area=25), | |
) | |
val_transforms = A.Compose( | |
basic_transforms, | |
bbox_params=A.BboxParams(format="coco", label_fields=["category"], clip=True), | |
) | |
train_data = ObjectDetectionDataset(train_data, train_transforms, image_processor, config) | |
if valid_data is not None: | |
valid_data = ObjectDetectionDataset(valid_data, val_transforms, image_processor, config) | |
return train_data, valid_data | |
return train_data, None | |
def convert_bbox_yolo_to_pascal(boxes, image_size): | |
""" | |
Convert bounding boxes from YOLO format (x_center, y_center, width, height) in range [0, 1] | |
to Pascal VOC format (x_min, y_min, x_max, y_max) in absolute coordinates. | |
Args: | |
boxes (torch.Tensor): Bounding boxes in YOLO format | |
image_size (Tuple[int, int]): Image size in format (height, width) | |
Returns: | |
torch.Tensor: Bounding boxes in Pascal VOC format (x_min, y_min, x_max, y_max) | |
""" | |
# convert center to corners format | |
boxes = center_to_corners_format(boxes) | |
# convert to absolute coordinates | |
height, width = image_size | |
boxes = boxes * torch.tensor([[width, height, width, height]]) | |
return boxes | |
def object_detection_metrics(evaluation_results, image_processor, threshold=0.0, id2label=None): | |
""" | |
Compute mean average mAP, mAR and their variants for the object detection task. | |
Args: | |
evaluation_results (EvalPrediction): Predictions and targets from evaluation. | |
threshold (float, optional): Threshold to filter predicted boxes by confidence. Defaults to 0.0. | |
id2label (Optional[dict], optional): Mapping from class id to class name. Defaults to None. | |
Returns: | |
Mapping[str, float]: Metrics in a form of dictionary {<metric_name>: <metric_value>} | |
""" | |
class ModelOutput: | |
logits: torch.Tensor | |
pred_boxes: torch.Tensor | |
predictions, targets = evaluation_results.predictions, evaluation_results.label_ids | |
# For metric computation we need to provide: | |
# - targets in a form of list of dictionaries with keys "boxes", "labels" | |
# - predictions in a form of list of dictionaries with keys "boxes", "scores", "labels" | |
image_sizes = [] | |
post_processed_targets = [] | |
post_processed_predictions = [] | |
# Collect targets in the required format for metric computation | |
for batch in targets: | |
# collect image sizes, we will need them for predictions post processing | |
batch_image_sizes = torch.tensor([x["orig_size"] for x in batch]) | |
image_sizes.append(batch_image_sizes) | |
# collect targets in the required format for metric computation | |
# boxes were converted to YOLO format needed for model training | |
# here we will convert them to Pascal VOC format (x_min, y_min, x_max, y_max) | |
for image_target in batch: | |
boxes = torch.tensor(image_target["boxes"]) | |
boxes = convert_bbox_yolo_to_pascal(boxes, image_target["orig_size"]) | |
labels = torch.tensor(image_target["class_labels"]) | |
post_processed_targets.append({"boxes": boxes, "labels": labels}) | |
# Collect predictions in the required format for metric computation, | |
# model produce boxes in YOLO format, then image_processor convert them to Pascal VOC format | |
for batch, target_sizes in zip(predictions, image_sizes): | |
batch_logits, batch_boxes = batch[1], batch[2] | |
output = ModelOutput(logits=torch.tensor(batch_logits), pred_boxes=torch.tensor(batch_boxes)) | |
post_processed_output = image_processor.post_process_object_detection( | |
output, threshold=threshold, target_sizes=target_sizes | |
) | |
post_processed_predictions.extend(post_processed_output) | |
# Compute metrics | |
metric = MeanAveragePrecision(box_format="xyxy", class_metrics=True) | |
metric.update(post_processed_predictions, post_processed_targets) | |
metrics = metric.compute() | |
# Replace list of per class metrics with separate metric for each class | |
classes = metrics.pop("classes") | |
try: | |
len(classes) | |
calc_map_per_class = True | |
except TypeError: | |
calc_map_per_class = False | |
if calc_map_per_class: | |
map_per_class = metrics.pop("map_per_class") | |
mar_100_per_class = metrics.pop("mar_100_per_class") | |
for class_id, class_map, class_mar in zip(classes, map_per_class, mar_100_per_class): | |
class_name = id2label[class_id.item()] if id2label is not None else class_id.item() | |
metrics[f"map_{class_name}"] = class_map | |
metrics[f"mar_100_{class_name}"] = class_mar | |
metrics = {k: round(v.item(), 4) for k, v in metrics.items()} | |
return metrics | |
def create_model_card(config, trainer): | |
""" | |
Generates a model card string based on the provided configuration and trainer. | |
Args: | |
config (object): Configuration object containing the following attributes: | |
- valid_split (optional): Validation split information. | |
- data_path (str): Path to the dataset. | |
- project_name (str): Name of the project. | |
- model (str): Path or identifier of the model. | |
trainer (object): Trainer object with an `evaluate` method that returns evaluation metrics. | |
Returns: | |
str: A formatted model card string containing dataset information, validation metrics, and base model details. | |
""" | |
if config.valid_split is not None: | |
eval_scores = trainer.evaluate() | |
eval_scores = [f"{k[len('eval_'):]}: {v}" for k, v in eval_scores.items() if k in VALID_METRICS] | |
eval_scores = "\n\n".join(eval_scores) | |
else: | |
eval_scores = "No validation metrics available" | |
if config.data_path == f"{config.project_name}/autotrain-data" or os.path.isdir(config.data_path): | |
dataset_tag = "" | |
else: | |
dataset_tag = f"\ndatasets:\n- {config.data_path}" | |
if os.path.isdir(config.model): | |
base_model = "" | |
else: | |
base_model = f"\nbase_model: {config.model}" | |
model_card = MODEL_CARD.format( | |
dataset_tag=dataset_tag, | |
validation_metrics=eval_scores, | |
base_model=base_model, | |
) | |
return model_card | |