SemanticSegmentationModel
/
semantic-segmentation
/SemanticModel
/.ipynb_checkpoints
/image_preprocessing-checkpoint.py
import cv2 | |
import numpy as np | |
import albumentations as albu | |
from albumentations.augmentations.geometric.resize import LongestMaxSize | |
def round_pixel_dim(dimension: float) -> int: | |
"""Rounds pixel dimensions consistently.""" | |
if abs(round(dimension) - dimension) == 0.5: | |
return int(2.0 * round(dimension / 2.0)) | |
return int(round(dimension)) | |
def resize_with_padding(image, target_size, stride=32, interpolation=cv2.INTER_LINEAR): | |
"""Resizes image maintaining aspect ratio and ensures dimensions are stride-compatible.""" | |
height, width = image.shape[:2] | |
max_dimension = max(height, width) | |
if ((height % stride == 0) and (width % stride == 0) and | |
(max_dimension <= target_size)): | |
return image | |
scale = target_size / float(max(width, height)) | |
new_dims = tuple(round_pixel_dim(dim * scale) for dim in (height, width)) | |
new_height, new_width = new_dims | |
new_height = ((new_height // stride + 1) * stride | |
if new_height % stride != 0 else new_height) | |
new_width = ((new_width // stride + 1) * stride | |
if new_width % stride != 0 else new_width) | |
return cv2.resize(image, (new_width, new_height), interpolation=interpolation) | |
class PaddedResize(LongestMaxSize): | |
def apply(self, img: np.ndarray, target_size: int = 1024, | |
interpolation: int = cv2.INTER_LINEAR, **params) -> np.ndarray: | |
return resize_with_padding(img, target_size=target_size, interpolation=interpolation) | |
def get_training_augmentations(width=768, height=576): | |
"""Configures training-time augmentations.""" | |
target_size = max([width, height]) | |
transforms = [ | |
albu.HorizontalFlip(p=0.5), | |
albu.ShiftScaleRotate( | |
scale_limit=0.5, rotate_limit=90, shift_limit=0.1, p=0.5, border_mode=0), | |
albu.PadIfNeeded(min_height=target_size, min_width=target_size, always_apply=True), | |
albu.RandomCrop(height=target_size, width=target_size, always_apply=True), | |
albu.GaussNoise(p=0.2), | |
albu.Perspective(p=0.2), | |
albu.OneOf([albu.CLAHE(p=1), albu.RandomGamma(p=1)], p=0.33), | |
albu.OneOf([ | |
albu.Sharpen(p=1), | |
albu.Blur(blur_limit=3, p=1), | |
albu.MotionBlur(blur_limit=3, p=1)], p=0.33), | |
albu.OneOf([ | |
albu.RandomBrightnessContrast(p=1), | |
albu.HueSaturationValue(p=1)], p=0.33), | |
] | |
return albu.Compose(transforms) | |
def get_validation_augmentations(width=1920, height=1440, fixed_size=True): | |
"""Configures validation/inference-time augmentations.""" | |
if fixed_size: | |
transforms = [albu.Resize(height=height, width=width, always_apply=True)] | |
return albu.Compose(transforms) | |
target_size = max(width, height) | |
transforms = [PaddedResize(max_size=target_size, always_apply=True)] | |
return albu.Compose(transforms) | |
def convert_to_tensor(x, **kwargs): | |
"""Converts image array to PyTorch tensor format.""" | |
if x.ndim == 2: | |
x = np.expand_dims(x, axis=-1) | |
return x.transpose(2, 0, 1).astype('float32') | |
def get_preprocessing_pipeline(preprocessing_fn): | |
"""Builds preprocessing pipeline including normalization and tensor conversion.""" | |
transforms = [ | |
albu.Lambda(image=preprocessing_fn), | |
albu.Lambda(image=convert_to_tensor, mask=convert_to_tensor), | |
] | |
return albu.Compose(transforms) |