Source code for benzina.torch.operations

# -*- coding: utf-8 -*-
from collections import Sequence

import numpy as np


[docs]class WarpTransform:
    """
    Interface class that represents a warp transformation as a combined rotation,
    scale, skew and translation 3 x 3 matrix. The transformation is called for each
    sample of a batch.
    """
    def __call__(self, index, in_shape, out_shape, rng):
        """
        __call__ needs to be implemented in subclasses

        Args:
            index (int): the index of the sample in the dataset
            in_shape (tuple of ints): the shape of the input sample
            out_shape (tuple of ints): the shape of the output sample
            rng (numpy.random.RandomState): a random number generator seeded by the dataloader

        Returns:
            out (tuple of numerics): a flatten, row-major 3 x 3 warp matrix returned in
                a tuple of numerics.
        """
        return NotImplementedError('__call__ needs to be implemented in subclasses')


class OOBTransform:
    """
    Interface class that represents an out of bounds transformation. The
    transformation is called for each sample of a batch.
    """
    def __call__(self, index, in_shape, out_shape, rng):
        """
        __call__ needs to be implemented in subclasses

        Args:
            index (int): the index of the sample in the dataset
            in_shape (tuple of ints): the shape of the input sample
            out_shape (tuple of ints): the shape of the output sample
            rng (numpy.random.RandomState): a random number generator seeded by the dataloader

        Returns:
            out (tuple of numerics): a tuple in RGB order containing the RGB color to
                use when no data is available. It should be in RGB order.
        """
        return NotImplementedError('__call__ needs to be implemented in subclasses')


class ColorTransform:
    """
    Interface class that represents a color transformation from YCbCr to RGB as
    defined in Benzina's kernel. The transformation is called for each sample of
    a batch.

    =====  =================================
    Index  Description
    =====  =================================
    0      ITU-R BT.601-6-625 recommentation

           * Kr = 0.299
           * Kg = 0.587
           * Kb = 0.114

           but full scale

           * Y,Cb,Cr in [0, 255]

    1      ITU-R BT.601-6-625 recommentation

           * Kr = 0.299
           * Kg = 0.587
           * Kb = 0.114

           with head/footroom

           * Y       in [16,235]
           * Cb,Cr   in [16,240]

    2      ITU-R BT.709 recommentation

           * Kr = 0.2126
           * Kg = 0.7152
           * Kb = 0.0722

           with head/footroom

           * Y       in [16,235]
           * Cb,Cr   in [16,240]

    3      ITU-R BT.2020 recommentation

           * Kr = 0.2627
           * Kg = 0.6780
           * Kb = 0.0593

           with head/footroom

           * Y       in [16,235]
           * Cb,Cr   in [16,240]
    =====  =================================
    """
    def __call__(self, index, in_shape, out_shape, rng):
        """
        __call__ needs to be implemented in subclasses

        Args:
            index (int): the index of the sample in the dataset
            in_shape (tuple of ints): the shape of the input sample
            out_shape (tuple of ints): the shape of the output sample
            rng (numpy.random.RandomState): a random number generator seeded by the dataloader

        Returns:
            out (tuple of numerics): a tuple containing a single int indicating which
                method to use when converting a sample's YCbCr value to RGB.
        """
        return NotImplementedError('__call__ needs to be implemented in subclasses')


[docs]class NormTransform:
    """
    Interface class that represents a normalization transformation. The transformation
    is called for each sample of a batch.
    """
    def __call__(self, index, in_shape, out_shape, rng):
        """
        __call__ needs to be implemented in subclasses

        Args:
            index (int): the index of the sample in the dataset
            in_shape (tuple of ints): the shape of the input sample
            out_shape (tuple of ints): the shape of the output sample
            rng (numpy.random.RandomState): a random number generator seeded by the dataloader

        Returns:
            out (tuple of numerics): a tuple in RGB order containing the normalization
                constant of a sample's RGB channels. Components will be multiplied to the
                respective channels of a sample.
        """
        return NotImplementedError('__call__ needs to be implemented in subclasses')


[docs]class BiasTransform:
    """
    Interface class that represents a bias transformation. The transformation
    is called for each sample of a batch.
    """
    def __call__(self, index, in_shape, out_shape, rng):
        """
        __call__ needs to be implemented in subclasses

        Args:
            index (int): the index of the sample in the dataset
            in_shape (tuple of ints): the shape of the input sample
            out_shape (tuple of ints): the shape of the output sample
            rng (numpy.random.RandomState): a random number generator seeded by the dataloader

        Returns:
            out (tuple of numerics): a tuple in RGB order containing the bias of a
                sample's RGB channels. Components will be substracted to the respective
                channels of a sample.
        """
        return NotImplementedError('__call__ needs to be implemented in subclasses')


[docs]class ConstantWarpTransform (WarpTransform):
    """
    Represents a constant warp transformation to be applied on each sample of a
    batch independently of its index.

    Args:
        warp (iterable of numerics, optional): a flatten, row-major 3 x 3 warp matrix
            (default: flatten identity matrix).
    """
    def __init__(self, warp=(1.0, 0.0, 0.0,
                             0.0, 1.0, 0.0,
                             0.0, 0.0, 1.0)):
        if warp is None:
            warp = (1.0, 0.0, 0.0,
                    0.0, 1.0, 0.0,
                    0.0, 0.0, 1.0)
        self.warp = tuple(warp)

    def __call__(self, index, in_shape, out_shape, rng):
        return self.warp


class ConstantOOBTransform  (OOBTransform):
    """
    Represents a constant out of bounds transformation to be applied on each
    sample of a batch independently of its index.

    Args:
        oob (numeric or iterable of numerics, optional): an iterable in RGB order
            containing the RGB color to use when no data is available (default:
            (0.0, 0.0, 0.0)).
    """
    def __init__(self, oob=(0.0, 0.0, 0.0)):
        if   oob is None:
            oob = (0.0, 0.0, 0.0)
        elif isinstance(oob, (int, float)):
            oob = (float(oob),)*3
        self.oob = tuple(oob)

    def __call__(self, index, in_shape, out_shape, rng):
        return self.oob


class ConstantColorTransform(ColorTransform):
    """
    Represents a constant color transformation to be applied on each sample of a
    batch independently of its index.

    Args:
        index (int, optional): the index of the method to use when converting
            a sample's YCbCr value to RGB (default: 0).
    """
    def __init__(self, index=0):
        if   index is None:
            index = (0,)
        elif isinstance(index, (int)):
            index = (int(index),)
        self.index = tuple(index)

    def __call__(self, index, in_shape, out_shape, rng):
        return self.index


[docs]class ConstantNormTransform (NormTransform):
    """
    Represents a constant norm transformation to be applied on each sample of a
    batch independently of its index.

    Args:
        norm (numeric or iterable of numerics, optional): an iterable in RGB order
            containing the normalization constant of a sample's RGB channels. Components
            will be multiplied to the respective channels of a sample
            (default: (1.0, 1.0, 1.0)).
    """
    def __init__(self, norm=(1.0, 1.0, 1.0)):
        if   norm is None:
            norm = (1.0, 1.0, 1.0)
        elif isinstance(norm, (int, float)):
            norm = (float(norm),)*3
        self.norm = tuple(norm)

    def __call__(self, index, in_shape, out_shape, rng):
        return self.norm


[docs]class ConstantBiasTransform (BiasTransform):
    """
    Represents a constant bias transformation to be applied on each sample of a
    batch independently of its index.

    Args:
        bias (numeric or iterable of numerics, optional): an iterable in RGB order
            containing the bias of a sample's RGB channels. Components will be
            substracted to the respective channels of a sample (default: (0.0, 0.0, 0.0)).
    """
    def __init__(self, bias=(0.0, 0.0, 0.0)):
        if   bias is None:
            bias = (0.0, 0.0, 0.0)
        elif isinstance(bias, (int, float)):
            bias = (float(bias),)*3
        self.bias = tuple(bias)

    def __call__(self, index, in_shape, out_shape, rng):
        return self.bias


[docs]class SimilarityTransform   (WarpTransform):
    """
    Similarity warp transformation of the image keeping center invariant.

    A crop of random size, aspect ratio and location is made. This crop can
    then be flipped and/or rotated to finally be resized to output size.

    Args:
        scale (Sequence or float or int, optional): crop area scaling factor
            interval, e.g (a, b), then scale is randomly sampled from the range
            a <= scale <= b. If scale is a number instead of sequence, the
            range of scale will be (scale^-1, scale).
            (default: ``(+1.0, +1.0)``)
        ratio (Sequence or float or int, optional): range of crop aspect ratio.
            If ratio is a number instead of sequence like (min, max), the range
            of aspect ratio will be (ratio^-1, ratio). Will keep original
            aspect ratio by default.
        degrees (Sequence or float or int, optional): range of degrees to
            select from. If degrees is a number instead of sequence like
            (min, max), the range of degrees will be (-degrees, +degrees).
            (default: ``(-0.0, +0.0)``)
        translate (Sequence or float or int, optional): sequence of maximum
            absolute fraction for horizontal and vertical translations. For
            example translate=(a, b), then horizontal shift is randomly sampled
            in the range -output_width * a < dx < output_width * a and vertical
            shift is randomly sampled in the range
            -output_height * b < dy < output_height * b. If translate is a
            number instead of sequence, translate will be
            (translate, translate). These translations are applied
            independently from :attr:`random_crop`. (default: ``(0.0, 0.0)``)
        flip_h (bool, optional): probability of the image being flipped
            horizontally. (default: ``+0.0``)
        flip_v (bool, optional): probability of the image being flipped
            vertically. (default: ``+0.0``)
        resize (bool, optional): resize the cropped image to fit the output
            size. It is forced to ``True`` if :attr:`scale` or :attr:`ratio`
            are specified. (default: ``False``)
        keep_ratio (bool, optional): match the smaller edge to the
            corresponding output edge size, keeping the aspect ratio after
            resize. Has no effect if :attr:`resize` is ``False``.
            (default: ``False``)
        random_crop (bool, optional): randomly crop the image instead of
            a center crop. (default: ``False``)
    """
    def __init__(self,
                 scale=(+1.0, +1.0),
                 ratio=None,
                 degrees=(-0.0, +0.0),
                 translate=(+0.0, +0.0),
                 flip_h=+0.0,
                 flip_v=+0.0,
                 resize=False,
                 keep_ratio=False,
                 random_crop=False):
        if not isinstance(scale, Sequence):
            scale = min(scale, 1/scale)
            scale = (scale, 1/scale)
        assert len(scale) == 2, \
            "scale should be a number or a sequence of length 2."
        for s in scale:
            if s <= 0:
                raise ValueError("scale values should be positive")

        if ratio is not None:
            if not isinstance(ratio, Sequence):
                ratio = (ratio, 1/ratio)
            assert len(ratio) == 2, \
                "ratio should be a number or a sequence of length 2."
            for ar in ratio:
                if ar <= 0:
                    raise ValueError("ratio values should be positive")
            ratio = (min(ratio), max(ratio))

        if not isinstance(degrees, Sequence):
            if degrees < 0:
                raise ValueError("If radians is a single number, it must be "
                                 "positive.")
            degrees = (-degrees, degrees)
        else:
            assert len(degrees) == 2, \
                "degrees should be a number or a sequence of length 2."
            degrees = degrees

        assert isinstance(translate, Sequence) and len(translate) == 2, \
            "translate should be a sequence and it must be of length 2."
        for t in translate:
            if not (0.0 <= t <= 1.0):
                raise ValueError("translation values should be between 0 and "
                                 "1")

        self.s = scale
        self.ar = ratio
        self.r = degrees
        self.t = translate
        self.fh = float(flip_h)
        self.fv = float(flip_v)
        self.resize = resize or scale != (1.0, 1.0) or ratio is not None
        self.keep_ratio = keep_ratio and self.resize
        self.random_crop = random_crop

    def __call__(self, index, in_shape, out_shape, rng):
        """Return a random similarity transformation."""
        s = np.exp(rng.uniform(low=np.log(self.s[0]), high=np.log(self.s[1])))
        if self.ar is not None:
            for _ in range(10):
                ar = np.exp(rng.uniform(low=np.log(self.ar[0]),
                                        high=np.log(self.ar[1])))
                crop_area = s * in_shape[0] * in_shape[1]
                crop_w = np.sqrt(crop_area * ar)
                crop_h = np.sqrt(crop_area / ar)

                if 0 < crop_w <= in_shape[0] and 0 < crop_h <= in_shape[1]:
                    break
            else:
                # Fallback to central crop
                in_ar = float(in_shape[0]) / float(in_shape[1])
                if in_ar < self.ar[0]:
                    crop_w = in_shape[0]
                    crop_h = int(round(crop_w / self.ar[0]))
                elif in_ar > self.ar[1]:
                    crop_h = in_shape[1]
                    crop_w = int(round(crop_h * self.ar[1]))
                else:  # whole image
                    crop_w, crop_h = in_shape
                self.random_crop = False
        elif self.resize:
            sqrt_s = np.sqrt(s)
            crop_w = in_shape[0] * sqrt_s
            crop_h = in_shape[1] * sqrt_s
        else:
            crop_w, crop_h = out_shape
        if self.random_crop:
            random_crop_tx = max(0, (in_shape[0] - crop_w) / 2)
            random_crop_ty = max(0, (in_shape[1] - crop_h) / 2)
            crop_x = rng.uniform(low=-random_crop_tx, high=random_crop_tx)
            crop_y = rng.uniform(low=-random_crop_ty, high=random_crop_ty)
        else:
            crop_x = 0
            crop_y = 0
        r = rng.uniform(low=self.r[0], high=self.r[1])
        max_tx = self.t[0] * out_shape[0]
        max_ty = self.t[1] * out_shape[1]
        tx = rng.uniform(low=-max_tx, high=max_tx)
        ty = rng.uniform(low=-max_ty, high=max_ty)
        fh = rng.uniform() < self.fh
        fv = rng.uniform() < self.fv

        H = compute_affine_matrix(in_shape, out_shape,
                                  (crop_x, crop_y, crop_w, crop_h),
                                  r, (tx, ty), fh, fv, self.resize,
                                  self.keep_ratio)

        return tuple(H.flatten().tolist())


[docs]class RandomResizedCrop     (SimilarityTransform):
    """
    Crop to random size, aspect ratio and location.

    A crop of random size, aspect ratio and location is made. This crop is
    finally resized to output size.

    This is popularly used to train the Inception networks.

    Args:
        scale (Sequence or float or int, optional): crop area scaling factor
            interval, e.g (a, b), then scale is randomly sampled from the range
            a <= scale <= b. If scale is a number instead of sequence, the
            range of scale will be (scale^-1, scale).
            (default: ``(+0.08, +1.0)``)
        ratio (Sequence or float or int, optional): range of crop aspect ratio.
            If ratio is a number instead of sequence like (min, max), the range
            of aspect ratio will be (ratio^-1, ratio). Will keep original
            aspect ratio by default. (default: ``(3./4., 4./3.)``)
    """
    def __init__(self,
                 scale=(+0.08, +1.0),
                 ratio=(3./4., 4./3.)):
        SimilarityTransform.__init__(self, scale=scale, ratio=ratio,
                                     resize=True, random_crop=True)


[docs]class CenterResizedCrop     (SimilarityTransform):
    """
    Crops at the center and resize.

    A crop at the center is made then resized to the output size.

    Args:
        scale (float or int, optional): edges scaling factor.
            (default: ``+1.0``)
        keep_ratio (bool, optional): match the smaller edge to the
            corresponding output edge size, keeping the aspect ratio after
            resize. Has no effect if :attr:`resize` is ``False``.
            (default: ``False``)
    """
    def __init__(self,
                 scale=+1.0,
                 keep_ratio=True):
        SimilarityTransform.__init__(self,
                                     scale=(pow(scale, 2), pow(scale, 2)),
                                     resize=True, keep_ratio=keep_ratio)


[docs]def compute_affine_matrix(in_shape,
                          out_shape,
                          crop=None,
                          degrees=0.0,
                          translate=(0.0, 0.0),
                          flip_h=False,
                          flip_v=False,
                          resize=False,
                          keep_ratio=False):
    """
    Similarity warp transformation of the image keeping center invariant.

    Args:
        in_shape (Sequence): the shape of the input image
        out_shape (Sequence): the shape of the output image
        crop (Sequence, optional): crop center location, width and height. The
            center location is relative to the center of the image. If
            :attr:`resize` is not ``True``, crop is simply a translation in the
            :attr:`in_shape` space.
        degrees (float or int, optional): degrees to rotate the crop.
            (default: ``(0.0)``)
        translate (Sequence, optional): horizontal and vertical translations.
            (default: ``(0.0, 0.0)``)
        flip_h (bool, optional): flip the image horizontally.
            (default: ``False``)
        flip_v (bool, optional): flip the image vertically.
            (default: ``False``)
        resize (bool, optional): resize the cropped image to fit the output's
            size. (default: ``False``)
        keep_ratio (bool, optional): match the smaller edge to the
            corresponding output edge size, keeping the aspect ratio after
            resize. Has no effect if :attr:`resize` is ``False``.
            (default: ``False``)
    """
    if crop is not None:
        T_crop_x, T_crop_y, crop_w, crop_h = crop
    else:
        T_crop_x, T_crop_y = 0, 0
        crop_w, crop_h = in_shape
    r = np.deg2rad(degrees)
    tx, ty = translate
    fh = 1 - 2 * float(flip_h)
    fv = 1 - 2 * float(flip_v)

    #
    # H = T_inshape*T_crop*R*S_resize*T_outshapeT
    #
    T_i_x = (in_shape[0] - 1) / 2
    T_i_y = (in_shape[1] - 1) / 2
    T_inshape = np.asarray([[fh, 0, T_i_x],
                            [0, fv, T_i_y],
                            [0, 0, 1]])
    T_crop = np.asarray([[1, 0, T_crop_x],
                         [0, 1, T_crop_y],
                         [0, 0, 1]])
    R = np.asarray([[+np.cos(r), -np.sin(r), 0],
                    [+np.sin(r), +np.cos(r), 0],
                    [0, 0, 1]])
    S_r_x = 1
    S_r_y = 1
    if resize:
        top_left, bot_right = R.dot([[-crop_w / 2, crop_w / 2],
                                     [-crop_h / 2, crop_h / 2],
                                     [1, 1]]).transpose()[:, 0:2]
        crop_w, crop_h = np.absolute(bot_right - top_left)
        S_r_x = crop_w / out_shape[0]
        S_r_y = crop_h / out_shape[1]
        if keep_ratio:
            scale_ratio = min(S_r_x, S_r_y)
            S_r_x = scale_ratio
            S_r_y = scale_ratio
    S_resize = np.asarray([[S_r_x, 0, 0],
                           [0, S_r_y, 0],
                           [0, 0, 1]])
    T_o_x = tx - (out_shape[0] - 1) / 2
    T_o_y = ty - (out_shape[1] - 1) / 2
    T_outshapeT = np.asarray([[1, 0, T_o_x],
                              [0, 1, T_o_y],
                              [0, 0, 1]])
    return T_inshape.dot(T_crop).dot(R).dot(S_resize).dot(T_outshapeT)
Source code for benzina.torch.operations

Benzina

Navigation

Related Topics