import os from dataclasses import dataclass import albumentations as A import torch from torchmetrics.detection.mean_ap import MeanAveragePrecision from transformers.image_transforms import center_to_corners_format from autotrain.trainers.object_detection.dataset import ObjectDetectionDataset VALID_METRICS = ( "eval_loss", "eval_map", "eval_map_50", "eval_map_75", "eval_map_small", "eval_map_medium", "eval_map_large", "eval_mar_1", "eval_mar_10", "eval_mar_100", "eval_mar_small", "eval_mar_medium", "eval_mar_large", ) MODEL_CARD = """ --- library_name: transformers tags: - autotrain - object-detection - vision{base_model} widget: - src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/tiger.jpg example_title: Tiger - src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/teapot.jpg example_title: Teapot - src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/palace.jpg example_title: Palace{dataset_tag} --- # Model Trained Using AutoTrain - Problem type: Object Detection ## Validation Metrics {validation_metrics} """ def collate_fn(batch): """ Collates a batch of data for object detection training. Args: batch (list): A list of dictionaries, where each dictionary contains 'pixel_values', 'labels', and optionally 'pixel_mask'. Returns: dict: A dictionary with the following keys: - 'pixel_values' (torch.Tensor): A tensor containing stacked pixel values from the batch. - 'labels' (list): A list of labels from the batch. - 'pixel_mask' (torch.Tensor, optional): A tensor containing stacked pixel masks from the batch, if 'pixel_mask' is present in the input batch. """ data = {} data["pixel_values"] = torch.stack([x["pixel_values"] for x in batch]) data["labels"] = [x["labels"] for x in batch] if "pixel_mask" in batch[0]: data["pixel_mask"] = torch.stack([x["pixel_mask"] for x in batch]) return data def process_data(train_data, valid_data, image_processor, config): """ Processes training and validation data for object detection. Args: train_data (list): List of training data samples. valid_data (list or None): List of validation data samples. If None, only training data is processed. image_processor (object): An image processor object that contains image processing configurations. config (dict): Configuration dictionary containing various settings for data processing. Returns: tuple: A tuple containing processed training data and validation data (if provided). If validation data is not provided, the second element of the tuple is None. """ max_size = image_processor.size["longest_edge"] basic_transforms = [ A.LongestMaxSize(max_size=max_size), A.PadIfNeeded(max_size, max_size, border_mode=0, value=(128, 128, 128), position="top_left"), ] train_transforms = A.Compose( [ A.Compose( [ A.SmallestMaxSize(max_size=max_size, p=1.0), A.RandomSizedBBoxSafeCrop(height=max_size, width=max_size, p=1.0), ], p=0.2, ), A.OneOf( [ A.Blur(blur_limit=7, p=0.5), A.MotionBlur(blur_limit=7, p=0.5), A.Defocus(radius=(1, 5), alias_blur=(0.1, 0.25), p=0.1), ], p=0.1, ), A.Perspective(p=0.1), A.HorizontalFlip(p=0.5), A.RandomBrightnessContrast(p=0.5), A.HueSaturationValue(p=0.1), *basic_transforms, ], bbox_params=A.BboxParams(format="coco", label_fields=["category"], clip=True, min_area=25), ) val_transforms = A.Compose( basic_transforms, bbox_params=A.BboxParams(format="coco", label_fields=["category"], clip=True), ) train_data = ObjectDetectionDataset(train_data, train_transforms, image_processor, config) if valid_data is not None: valid_data = ObjectDetectionDataset(valid_data, val_transforms, image_processor, config) return train_data, valid_data return train_data, None def convert_bbox_yolo_to_pascal(boxes, image_size): """ Convert bounding boxes from YOLO format (x_center, y_center, width, height) in range [0, 1] to Pascal VOC format (x_min, y_min, x_max, y_max) in absolute coordinates. Args: boxes (torch.Tensor): Bounding boxes in YOLO format image_size (Tuple[int, int]): Image size in format (height, width) Returns: torch.Tensor: Bounding boxes in Pascal VOC format (x_min, y_min, x_max, y_max) """ # convert center to corners format boxes = center_to_corners_format(boxes) # convert to absolute coordinates height, width = image_size boxes = boxes * torch.tensor([[width, height, width, height]]) return boxes @torch.no_grad() def object_detection_metrics(evaluation_results, image_processor, threshold=0.0, id2label=None): """ Compute mean average mAP, mAR and their variants for the object detection task. Args: evaluation_results (EvalPrediction): Predictions and targets from evaluation. threshold (float, optional): Threshold to filter predicted boxes by confidence. Defaults to 0.0. id2label (Optional[dict], optional): Mapping from class id to class name. Defaults to None. Returns: Mapping[str, float]: Metrics in a form of dictionary {: } """ @dataclass class ModelOutput: logits: torch.Tensor pred_boxes: torch.Tensor predictions, targets = evaluation_results.predictions, evaluation_results.label_ids # For metric computation we need to provide: # - targets in a form of list of dictionaries with keys "boxes", "labels" # - predictions in a form of list of dictionaries with keys "boxes", "scores", "labels" image_sizes = [] post_processed_targets = [] post_processed_predictions = [] # Collect targets in the required format for metric computation for batch in targets: # collect image sizes, we will need them for predictions post processing batch_image_sizes = torch.tensor([x["orig_size"] for x in batch]) image_sizes.append(batch_image_sizes) # collect targets in the required format for metric computation # boxes were converted to YOLO format needed for model training # here we will convert them to Pascal VOC format (x_min, y_min, x_max, y_max) for image_target in batch: boxes = torch.tensor(image_target["boxes"]) boxes = convert_bbox_yolo_to_pascal(boxes, image_target["orig_size"]) labels = torch.tensor(image_target["class_labels"]) post_processed_targets.append({"boxes": boxes, "labels": labels}) # Collect predictions in the required format for metric computation, # model produce boxes in YOLO format, then image_processor convert them to Pascal VOC format for batch, target_sizes in zip(predictions, image_sizes): batch_logits, batch_boxes = batch[1], batch[2] output = ModelOutput(logits=torch.tensor(batch_logits), pred_boxes=torch.tensor(batch_boxes)) post_processed_output = image_processor.post_process_object_detection( output, threshold=threshold, target_sizes=target_sizes ) post_processed_predictions.extend(post_processed_output) # Compute metrics metric = MeanAveragePrecision(box_format="xyxy", class_metrics=True) metric.update(post_processed_predictions, post_processed_targets) metrics = metric.compute() # Replace list of per class metrics with separate metric for each class classes = metrics.pop("classes") try: len(classes) calc_map_per_class = True except TypeError: calc_map_per_class = False if calc_map_per_class: map_per_class = metrics.pop("map_per_class") mar_100_per_class = metrics.pop("mar_100_per_class") for class_id, class_map, class_mar in zip(classes, map_per_class, mar_100_per_class): class_name = id2label[class_id.item()] if id2label is not None else class_id.item() metrics[f"map_{class_name}"] = class_map metrics[f"mar_100_{class_name}"] = class_mar metrics = {k: round(v.item(), 4) for k, v in metrics.items()} return metrics def create_model_card(config, trainer): """ Generates a model card string based on the provided configuration and trainer. Args: config (object): Configuration object containing the following attributes: - valid_split (optional): Validation split information. - data_path (str): Path to the dataset. - project_name (str): Name of the project. - model (str): Path or identifier of the model. trainer (object): Trainer object with an `evaluate` method that returns evaluation metrics. Returns: str: A formatted model card string containing dataset information, validation metrics, and base model details. """ if config.valid_split is not None: eval_scores = trainer.evaluate() eval_scores = [f"{k[len('eval_'):]}: {v}" for k, v in eval_scores.items() if k in VALID_METRICS] eval_scores = "\n\n".join(eval_scores) else: eval_scores = "No validation metrics available" if config.data_path == f"{config.project_name}/autotrain-data" or os.path.isdir(config.data_path): dataset_tag = "" else: dataset_tag = f"\ndatasets:\n- {config.data_path}" if os.path.isdir(config.model): base_model = "" else: base_model = f"\nbase_model: {config.model}" model_card = MODEL_CARD.format( dataset_tag=dataset_tag, validation_metrics=eval_scores, base_model=base_model, ) return model_card