Source code for benzina.torch.operations

# -*- coding: utf-8 -*-
from collections import Sequence

import numpy as np


[docs]class WarpTransform: """ Interface class that represents a warp transformation as a combined rotation, scale, skew and translation 3 x 3 matrix. The transformation is called for each sample of a batch. """ def __call__(self, index, in_shape, out_shape, rng): """ __call__ needs to be implemented in subclasses Args: index (int): the index of the sample in the dataset in_shape (tuple of ints): the shape of the input sample out_shape (tuple of ints): the shape of the output sample rng (numpy.random.RandomState): a random number generator seeded by the dataloader Returns: out (tuple of numerics): a flatten, row-major 3 x 3 warp matrix returned in a tuple of numerics. """ return NotImplementedError('__call__ needs to be implemented in subclasses')
class OOBTransform: """ Interface class that represents an out of bounds transformation. The transformation is called for each sample of a batch. """ def __call__(self, index, in_shape, out_shape, rng): """ __call__ needs to be implemented in subclasses Args: index (int): the index of the sample in the dataset in_shape (tuple of ints): the shape of the input sample out_shape (tuple of ints): the shape of the output sample rng (numpy.random.RandomState): a random number generator seeded by the dataloader Returns: out (tuple of numerics): a tuple in RGB order containing the RGB color to use when no data is available. It should be in RGB order. """ return NotImplementedError('__call__ needs to be implemented in subclasses') class ColorTransform: """ Interface class that represents a color transformation from YCbCr to RGB as defined in Benzina's kernel. The transformation is called for each sample of a batch. ===== ================================= Index Description ===== ================================= 0 ITU-R BT.601-6-625 recommentation * Kr = 0.299 * Kg = 0.587 * Kb = 0.114 but full scale * Y,Cb,Cr in [0, 255] 1 ITU-R BT.601-6-625 recommentation * Kr = 0.299 * Kg = 0.587 * Kb = 0.114 with head/footroom * Y in [16,235] * Cb,Cr in [16,240] 2 ITU-R BT.709 recommentation * Kr = 0.2126 * Kg = 0.7152 * Kb = 0.0722 with head/footroom * Y in [16,235] * Cb,Cr in [16,240] 3 ITU-R BT.2020 recommentation * Kr = 0.2627 * Kg = 0.6780 * Kb = 0.0593 with head/footroom * Y in [16,235] * Cb,Cr in [16,240] ===== ================================= """ def __call__(self, index, in_shape, out_shape, rng): """ __call__ needs to be implemented in subclasses Args: index (int): the index of the sample in the dataset in_shape (tuple of ints): the shape of the input sample out_shape (tuple of ints): the shape of the output sample rng (numpy.random.RandomState): a random number generator seeded by the dataloader Returns: out (tuple of numerics): a tuple containing a single int indicating which method to use when converting a sample's YCbCr value to RGB. """ return NotImplementedError('__call__ needs to be implemented in subclasses')
[docs]class NormTransform: """ Interface class that represents a normalization transformation. The transformation is called for each sample of a batch. """ def __call__(self, index, in_shape, out_shape, rng): """ __call__ needs to be implemented in subclasses Args: index (int): the index of the sample in the dataset in_shape (tuple of ints): the shape of the input sample out_shape (tuple of ints): the shape of the output sample rng (numpy.random.RandomState): a random number generator seeded by the dataloader Returns: out (tuple of numerics): a tuple in RGB order containing the normalization constant of a sample's RGB channels. Components will be multiplied to the respective channels of a sample. """ return NotImplementedError('__call__ needs to be implemented in subclasses')
[docs]class BiasTransform: """ Interface class that represents a bias transformation. The transformation is called for each sample of a batch. """ def __call__(self, index, in_shape, out_shape, rng): """ __call__ needs to be implemented in subclasses Args: index (int): the index of the sample in the dataset in_shape (tuple of ints): the shape of the input sample out_shape (tuple of ints): the shape of the output sample rng (numpy.random.RandomState): a random number generator seeded by the dataloader Returns: out (tuple of numerics): a tuple in RGB order containing the bias of a sample's RGB channels. Components will be substracted to the respective channels of a sample. """ return NotImplementedError('__call__ needs to be implemented in subclasses')
[docs]class ConstantWarpTransform (WarpTransform): """ Represents a constant warp transformation to be applied on each sample of a batch independently of its index. Args: warp (iterable of numerics, optional): a flatten, row-major 3 x 3 warp matrix (default: flatten identity matrix). """ def __init__(self, warp=(1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)): if warp is None: warp = (1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0) self.warp = tuple(warp) def __call__(self, index, in_shape, out_shape, rng): return self.warp
class ConstantOOBTransform (OOBTransform): """ Represents a constant out of bounds transformation to be applied on each sample of a batch independently of its index. Args: oob (numeric or iterable of numerics, optional): an iterable in RGB order containing the RGB color to use when no data is available (default: (0.0, 0.0, 0.0)). """ def __init__(self, oob=(0.0, 0.0, 0.0)): if oob is None: oob = (0.0, 0.0, 0.0) elif isinstance(oob, (int, float)): oob = (float(oob),)*3 self.oob = tuple(oob) def __call__(self, index, in_shape, out_shape, rng): return self.oob class ConstantColorTransform(ColorTransform): """ Represents a constant color transformation to be applied on each sample of a batch independently of its index. Args: index (int, optional): the index of the method to use when converting a sample's YCbCr value to RGB (default: 0). """ def __init__(self, index=0): if index is None: index = (0,) elif isinstance(index, (int)): index = (int(index),) self.index = tuple(index) def __call__(self, index, in_shape, out_shape, rng): return self.index
[docs]class ConstantNormTransform (NormTransform): """ Represents a constant norm transformation to be applied on each sample of a batch independently of its index. Args: norm (numeric or iterable of numerics, optional): an iterable in RGB order containing the normalization constant of a sample's RGB channels. Components will be multiplied to the respective channels of a sample (default: (1.0, 1.0, 1.0)). """ def __init__(self, norm=(1.0, 1.0, 1.0)): if norm is None: norm = (1.0, 1.0, 1.0) elif isinstance(norm, (int, float)): norm = (float(norm),)*3 self.norm = tuple(norm) def __call__(self, index, in_shape, out_shape, rng): return self.norm
[docs]class ConstantBiasTransform (BiasTransform): """ Represents a constant bias transformation to be applied on each sample of a batch independently of its index. Args: bias (numeric or iterable of numerics, optional): an iterable in RGB order containing the bias of a sample's RGB channels. Components will be substracted to the respective channels of a sample (default: (0.0, 0.0, 0.0)). """ def __init__(self, bias=(0.0, 0.0, 0.0)): if bias is None: bias = (0.0, 0.0, 0.0) elif isinstance(bias, (int, float)): bias = (float(bias),)*3 self.bias = tuple(bias) def __call__(self, index, in_shape, out_shape, rng): return self.bias
[docs]class SimilarityTransform (WarpTransform): """ Similarity warp transformation of the image keeping center invariant. A crop of random size, aspect ratio and location is made. This crop can then be flipped and/or rotated to finally be resized to output size. Args: scale (Sequence or float or int, optional): crop area scaling factor interval, e.g (a, b), then scale is randomly sampled from the range a <= scale <= b. If scale is a number instead of sequence, the range of scale will be (scale^-1, scale). (default: ``(+1.0, +1.0)``) ratio (Sequence or float or int, optional): range of crop aspect ratio. If ratio is a number instead of sequence like (min, max), the range of aspect ratio will be (ratio^-1, ratio). Will keep original aspect ratio by default. degrees (Sequence or float or int, optional): range of degrees to select from. If degrees is a number instead of sequence like (min, max), the range of degrees will be (-degrees, +degrees). (default: ``(-0.0, +0.0)``) translate (Sequence or float or int, optional): sequence of maximum absolute fraction for horizontal and vertical translations. For example translate=(a, b), then horizontal shift is randomly sampled in the range -output_width * a < dx < output_width * a and vertical shift is randomly sampled in the range -output_height * b < dy < output_height * b. If translate is a number instead of sequence, translate will be (translate, translate). These translations are applied independently from :attr:`random_crop`. (default: ``(0.0, 0.0)``) flip_h (bool, optional): probability of the image being flipped horizontally. (default: ``+0.0``) flip_v (bool, optional): probability of the image being flipped vertically. (default: ``+0.0``) resize (bool, optional): resize the cropped image to fit the output size. It is forced to ``True`` if :attr:`scale` or :attr:`ratio` are specified. (default: ``False``) keep_ratio (bool, optional): match the smaller edge to the corresponding output edge size, keeping the aspect ratio after resize. Has no effect if :attr:`resize` is ``False``. (default: ``False``) random_crop (bool, optional): randomly crop the image instead of a center crop. (default: ``False``) """ def __init__(self, scale=(+1.0, +1.0), ratio=None, degrees=(-0.0, +0.0), translate=(+0.0, +0.0), flip_h=+0.0, flip_v=+0.0, resize=False, keep_ratio=False, random_crop=False): if not isinstance(scale, Sequence): scale = min(scale, 1/scale) scale = (scale, 1/scale) assert len(scale) == 2, \ "scale should be a number or a sequence of length 2." for s in scale: if s <= 0: raise ValueError("scale values should be positive") if ratio is not None: if not isinstance(ratio, Sequence): ratio = (ratio, 1/ratio) assert len(ratio) == 2, \ "ratio should be a number or a sequence of length 2." for ar in ratio: if ar <= 0: raise ValueError("ratio values should be positive") ratio = (min(ratio), max(ratio)) if not isinstance(degrees, Sequence): if degrees < 0: raise ValueError("If radians is a single number, it must be " "positive.") degrees = (-degrees, degrees) else: assert len(degrees) == 2, \ "degrees should be a number or a sequence of length 2." degrees = degrees assert isinstance(translate, Sequence) and len(translate) == 2, \ "translate should be a sequence and it must be of length 2." for t in translate: if not (0.0 <= t <= 1.0): raise ValueError("translation values should be between 0 and " "1") self.s = scale self.ar = ratio self.r = degrees self.t = translate self.fh = float(flip_h) self.fv = float(flip_v) self.resize = resize or scale != (1.0, 1.0) or ratio is not None self.keep_ratio = keep_ratio and self.resize self.random_crop = random_crop def __call__(self, index, in_shape, out_shape, rng): """Return a random similarity transformation.""" s = np.exp(rng.uniform(low=np.log(self.s[0]), high=np.log(self.s[1]))) if self.ar is not None: for _ in range(10): ar = np.exp(rng.uniform(low=np.log(self.ar[0]), high=np.log(self.ar[1]))) crop_area = s * in_shape[0] * in_shape[1] crop_w = np.sqrt(crop_area * ar) crop_h = np.sqrt(crop_area / ar) if 0 < crop_w <= in_shape[0] and 0 < crop_h <= in_shape[1]: break else: # Fallback to central crop in_ar = float(in_shape[0]) / float(in_shape[1]) if in_ar < self.ar[0]: crop_w = in_shape[0] crop_h = int(round(crop_w / self.ar[0])) elif in_ar > self.ar[1]: crop_h = in_shape[1] crop_w = int(round(crop_h * self.ar[1])) else: # whole image crop_w, crop_h = in_shape self.random_crop = False elif self.resize: sqrt_s = np.sqrt(s) crop_w = in_shape[0] * sqrt_s crop_h = in_shape[1] * sqrt_s else: crop_w, crop_h = out_shape if self.random_crop: random_crop_tx = max(0, (in_shape[0] - crop_w) / 2) random_crop_ty = max(0, (in_shape[1] - crop_h) / 2) crop_x = rng.uniform(low=-random_crop_tx, high=random_crop_tx) crop_y = rng.uniform(low=-random_crop_ty, high=random_crop_ty) else: crop_x = 0 crop_y = 0 r = rng.uniform(low=self.r[0], high=self.r[1]) max_tx = self.t[0] * out_shape[0] max_ty = self.t[1] * out_shape[1] tx = rng.uniform(low=-max_tx, high=max_tx) ty = rng.uniform(low=-max_ty, high=max_ty) fh = rng.uniform() < self.fh fv = rng.uniform() < self.fv H = compute_affine_matrix(in_shape, out_shape, (crop_x, crop_y, crop_w, crop_h), r, (tx, ty), fh, fv, self.resize, self.keep_ratio) return tuple(H.flatten().tolist())
[docs]class RandomResizedCrop (SimilarityTransform): """ Crop to random size, aspect ratio and location. A crop of random size, aspect ratio and location is made. This crop is finally resized to output size. This is popularly used to train the Inception networks. Args: scale (Sequence or float or int, optional): crop area scaling factor interval, e.g (a, b), then scale is randomly sampled from the range a <= scale <= b. If scale is a number instead of sequence, the range of scale will be (scale^-1, scale). (default: ``(+0.08, +1.0)``) ratio (Sequence or float or int, optional): range of crop aspect ratio. If ratio is a number instead of sequence like (min, max), the range of aspect ratio will be (ratio^-1, ratio). Will keep original aspect ratio by default. (default: ``(3./4., 4./3.)``) """ def __init__(self, scale=(+0.08, +1.0), ratio=(3./4., 4./3.)): SimilarityTransform.__init__(self, scale=scale, ratio=ratio, resize=True, random_crop=True)
[docs]class CenterResizedCrop (SimilarityTransform): """ Crops at the center and resize. A crop at the center is made then resized to the output size. Args: scale (float or int, optional): edges scaling factor. (default: ``+1.0``) keep_ratio (bool, optional): match the smaller edge to the corresponding output edge size, keeping the aspect ratio after resize. Has no effect if :attr:`resize` is ``False``. (default: ``False``) """ def __init__(self, scale=+1.0, keep_ratio=True): SimilarityTransform.__init__(self, scale=(pow(scale, 2), pow(scale, 2)), resize=True, keep_ratio=keep_ratio)
[docs]def compute_affine_matrix(in_shape, out_shape, crop=None, degrees=0.0, translate=(0.0, 0.0), flip_h=False, flip_v=False, resize=False, keep_ratio=False): """ Similarity warp transformation of the image keeping center invariant. Args: in_shape (Sequence): the shape of the input image out_shape (Sequence): the shape of the output image crop (Sequence, optional): crop center location, width and height. The center location is relative to the center of the image. If :attr:`resize` is not ``True``, crop is simply a translation in the :attr:`in_shape` space. degrees (float or int, optional): degrees to rotate the crop. (default: ``(0.0)``) translate (Sequence, optional): horizontal and vertical translations. (default: ``(0.0, 0.0)``) flip_h (bool, optional): flip the image horizontally. (default: ``False``) flip_v (bool, optional): flip the image vertically. (default: ``False``) resize (bool, optional): resize the cropped image to fit the output's size. (default: ``False``) keep_ratio (bool, optional): match the smaller edge to the corresponding output edge size, keeping the aspect ratio after resize. Has no effect if :attr:`resize` is ``False``. (default: ``False``) """ if crop is not None: T_crop_x, T_crop_y, crop_w, crop_h = crop else: T_crop_x, T_crop_y = 0, 0 crop_w, crop_h = in_shape r = np.deg2rad(degrees) tx, ty = translate fh = 1 - 2 * float(flip_h) fv = 1 - 2 * float(flip_v) # # H = T_inshape*T_crop*R*S_resize*T_outshapeT # T_i_x = (in_shape[0] - 1) / 2 T_i_y = (in_shape[1] - 1) / 2 T_inshape = np.asarray([[fh, 0, T_i_x], [0, fv, T_i_y], [0, 0, 1]]) T_crop = np.asarray([[1, 0, T_crop_x], [0, 1, T_crop_y], [0, 0, 1]]) R = np.asarray([[+np.cos(r), -np.sin(r), 0], [+np.sin(r), +np.cos(r), 0], [0, 0, 1]]) S_r_x = 1 S_r_y = 1 if resize: top_left, bot_right = R.dot([[-crop_w / 2, crop_w / 2], [-crop_h / 2, crop_h / 2], [1, 1]]).transpose()[:, 0:2] crop_w, crop_h = np.absolute(bot_right - top_left) S_r_x = crop_w / out_shape[0] S_r_y = crop_h / out_shape[1] if keep_ratio: scale_ratio = min(S_r_x, S_r_y) S_r_x = scale_ratio S_r_y = scale_ratio S_resize = np.asarray([[S_r_x, 0, 0], [0, S_r_y, 0], [0, 0, 1]]) T_o_x = tx - (out_shape[0] - 1) / 2 T_o_y = ty - (out_shape[1] - 1) / 2 T_outshapeT = np.asarray([[1, 0, T_o_x], [0, 1, T_o_y], [0, 0, 1]]) return T_inshape.dot(T_crop).dot(R).dot(S_resize).dot(T_outshapeT)