Spaces:

hardiktiwari
/

tensora-autotrain

Sleeping

App Files Files Community

tensora-autotrain / src /autotrain /trainers /object_detection /utils.py

hardiktiwari

Upload 244 files

33d4721 verified 3 months ago

raw

history blame contribute delete

10.1 kB

	import os
	from dataclasses import dataclass

	import albumentations as A
	import torch
	from torchmetrics.detection.mean_ap import MeanAveragePrecision
	from transformers.image_transforms import center_to_corners_format

	from autotrain.trainers.object_detection.dataset import ObjectDetectionDataset


	VALID_METRICS = (
	"eval_loss",
	"eval_map",
	"eval_map_50",
	"eval_map_75",
	"eval_map_small",
	"eval_map_medium",
	"eval_map_large",
	"eval_mar_1",
	"eval_mar_10",
	"eval_mar_100",
	"eval_mar_small",
	"eval_mar_medium",
	"eval_mar_large",
	)

	MODEL_CARD = """
	---
	library_name: transformers
	tags:
	- autotrain
	- object-detection
	- vision{base_model}
	widget:
	- src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/tiger.jpg
	example_title: Tiger
	- src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/teapot.jpg
	example_title: Teapot
	- src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/palace.jpg
	example_title: Palace{dataset_tag}
	---

	# Model Trained Using AutoTrain

	- Problem type: Object Detection

	## Validation Metrics
	{validation_metrics}
	"""


	def collate_fn(batch):
	"""
	Collates a batch of data for object detection training.

	Args:
	batch (list): A list of dictionaries, where each dictionary contains
	'pixel_values', 'labels', and optionally 'pixel_mask'.

	Returns:
	dict: A dictionary with the following keys:
	- 'pixel_values' (torch.Tensor): A tensor containing stacked pixel values from the batch.
	- 'labels' (list): A list of labels from the batch.
	- 'pixel_mask' (torch.Tensor, optional): A tensor containing stacked pixel masks from the batch,
	if 'pixel_mask' is present in the input batch.
	"""
	data = {}
	data["pixel_values"] = torch.stack([x["pixel_values"] for x in batch])
	data["labels"] = [x["labels"] for x in batch]
	if "pixel_mask" in batch[0]:
	data["pixel_mask"] = torch.stack([x["pixel_mask"] for x in batch])
	return data


	def process_data(train_data, valid_data, image_processor, config):
	"""
	Processes training and validation data for object detection.

	Args:
	train_data (list): List of training data samples.
	valid_data (list or None): List of validation data samples. If None, only training data is processed.
	image_processor (object): An image processor object that contains image processing configurations.
	config (dict): Configuration dictionary containing various settings for data processing.

	Returns:
	tuple: A tuple containing processed training data and validation data (if provided). If validation data is not provided, the second element of the tuple is None.
	"""
	max_size = image_processor.size["longest_edge"]
	basic_transforms = [
	A.LongestMaxSize(max_size=max_size),
	A.PadIfNeeded(max_size, max_size, border_mode=0, value=(128, 128, 128), position="top_left"),
	]
	train_transforms = A.Compose(
	[
	A.Compose(
	[
	A.SmallestMaxSize(max_size=max_size, p=1.0),
	A.RandomSizedBBoxSafeCrop(height=max_size, width=max_size, p=1.0),
	],
	p=0.2,
	),
	A.OneOf(
	[
	A.Blur(blur_limit=7, p=0.5),
	A.MotionBlur(blur_limit=7, p=0.5),
	A.Defocus(radius=(1, 5), alias_blur=(0.1, 0.25), p=0.1),
	],
	p=0.1,
	),
	A.Perspective(p=0.1),
	A.HorizontalFlip(p=0.5),
	A.RandomBrightnessContrast(p=0.5),
	A.HueSaturationValue(p=0.1),
	*basic_transforms,
	],
	bbox_params=A.BboxParams(format="coco", label_fields=["category"], clip=True, min_area=25),
	)
	val_transforms = A.Compose(
	basic_transforms,
	bbox_params=A.BboxParams(format="coco", label_fields=["category"], clip=True),
	)

	train_data = ObjectDetectionDataset(train_data, train_transforms, image_processor, config)
	if valid_data is not None:
	valid_data = ObjectDetectionDataset(valid_data, val_transforms, image_processor, config)
	return train_data, valid_data
	return train_data, None


	def convert_bbox_yolo_to_pascal(boxes, image_size):
	"""
	Convert bounding boxes from YOLO format (x_center, y_center, width, height) in range [0, 1]
	to Pascal VOC format (x_min, y_min, x_max, y_max) in absolute coordinates.

	Args:
	boxes (torch.Tensor): Bounding boxes in YOLO format
	image_size (Tuple[int, int]): Image size in format (height, width)

	Returns:
	torch.Tensor: Bounding boxes in Pascal VOC format (x_min, y_min, x_max, y_max)
	"""
	# convert center to corners format
	boxes = center_to_corners_format(boxes)

	# convert to absolute coordinates
	height, width = image_size
	boxes = boxes * torch.tensor([[width, height, width, height]])

	return boxes


	@torch.no_grad()
	def object_detection_metrics(evaluation_results, image_processor, threshold=0.0, id2label=None):
	"""
	Compute mean average mAP, mAR and their variants for the object detection task.

	Args:
	evaluation_results (EvalPrediction): Predictions and targets from evaluation.
	threshold (float, optional): Threshold to filter predicted boxes by confidence. Defaults to 0.0.
	id2label (Optional[dict], optional): Mapping from class id to class name. Defaults to None.

	Returns:
	Mapping[str, float]: Metrics in a form of dictionary {<metric_name>: <metric_value>}
	"""

	@dataclass
	class ModelOutput:
	logits: torch.Tensor
	pred_boxes: torch.Tensor

	predictions, targets = evaluation_results.predictions, evaluation_results.label_ids

	# For metric computation we need to provide:
	# - targets in a form of list of dictionaries with keys "boxes", "labels"
	# - predictions in a form of list of dictionaries with keys "boxes", "scores", "labels"

	image_sizes = []
	post_processed_targets = []
	post_processed_predictions = []

	# Collect targets in the required format for metric computation
	for batch in targets:
	# collect image sizes, we will need them for predictions post processing
	batch_image_sizes = torch.tensor([x["orig_size"] for x in batch])
	image_sizes.append(batch_image_sizes)
	# collect targets in the required format for metric computation
	# boxes were converted to YOLO format needed for model training
	# here we will convert them to Pascal VOC format (x_min, y_min, x_max, y_max)
	for image_target in batch:
	boxes = torch.tensor(image_target["boxes"])
	boxes = convert_bbox_yolo_to_pascal(boxes, image_target["orig_size"])
	labels = torch.tensor(image_target["class_labels"])
	post_processed_targets.append({"boxes": boxes, "labels": labels})

	# Collect predictions in the required format for metric computation,
	# model produce boxes in YOLO format, then image_processor convert them to Pascal VOC format
	for batch, target_sizes in zip(predictions, image_sizes):
	batch_logits, batch_boxes = batch[1], batch[2]
	output = ModelOutput(logits=torch.tensor(batch_logits), pred_boxes=torch.tensor(batch_boxes))
	post_processed_output = image_processor.post_process_object_detection(
	output, threshold=threshold, target_sizes=target_sizes
	)
	post_processed_predictions.extend(post_processed_output)

	# Compute metrics
	metric = MeanAveragePrecision(box_format="xyxy", class_metrics=True)
	metric.update(post_processed_predictions, post_processed_targets)
	metrics = metric.compute()

	# Replace list of per class metrics with separate metric for each class
	classes = metrics.pop("classes")
	try:
	len(classes)
	calc_map_per_class = True
	except TypeError:
	calc_map_per_class = False

	if calc_map_per_class:
	map_per_class = metrics.pop("map_per_class")
	mar_100_per_class = metrics.pop("mar_100_per_class")
	for class_id, class_map, class_mar in zip(classes, map_per_class, mar_100_per_class):
	class_name = id2label[class_id.item()] if id2label is not None else class_id.item()
	metrics[f"map_{class_name}"] = class_map
	metrics[f"mar_100_{class_name}"] = class_mar

	metrics = {k: round(v.item(), 4) for k, v in metrics.items()}

	return metrics


	def create_model_card(config, trainer):
	"""
	Generates a model card string based on the provided configuration and trainer.

	Args:
	config (object): Configuration object containing the following attributes:
	- valid_split (optional): Validation split information.
	- data_path (str): Path to the dataset.
	- project_name (str): Name of the project.
	- model (str): Path or identifier of the model.
	trainer (object): Trainer object with an `evaluate` method that returns evaluation metrics.

	Returns:
	str: A formatted model card string containing dataset information, validation metrics, and base model details.
	"""
	if config.valid_split is not None:
	eval_scores = trainer.evaluate()
	eval_scores = [f"{k[len('eval_'):]}: {v}" for k, v in eval_scores.items() if k in VALID_METRICS]
	eval_scores = "\n\n".join(eval_scores)

	else:
	eval_scores = "No validation metrics available"

	if config.data_path == f"{config.project_name}/autotrain-data" or os.path.isdir(config.data_path):
	dataset_tag = ""
	else:
	dataset_tag = f"\ndatasets:\n- {config.data_path}"

	if os.path.isdir(config.model):
	base_model = ""
	else:
	base_model = f"\nbase_model: {config.model}"

	model_card = MODEL_CARD.format(
	dataset_tag=dataset_tag,
	validation_metrics=eval_scores,
	base_model=base_model,
	)
	return model_card