Source code for carpedm.data.preproc

#
# Copyright (c) 2018 Neal Digre.
#
# This software may be modified and distributed under the terms
# of the MIT license. See the LICENSE file for details.

"""Preprocessing methods.

This module provides methods for preprocessing images.

Todo:
    * Tests
        * ``convert_to_grayscale``
        * ``normalize``
        * ``pad_borders``
    * Fix and generalize ``distort_image``
"""
import tensorflow as tf


[docs]def convert_to_grayscale(image):
    """Convert RGB image to grayscale."""
    image = tf.image.rgb_to_grayscale(image)
    return image


[docs]def normalize(image):
    """Rescale pixels values (to [-1, 1])."""
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    image = tf.subtract(image, 0.5)
    image = tf.multiply(image, 2.0)
    return image


[docs]def pad_borders_or_shrink(image, char_bbox, line_bbox,
                          shape, maintain_aspect=True):
    """Pad or resize the image.

    If the desired shape is larger than the original, then that axis is
    padded equally on both sides with the mean pixel value in the image.
    Otherwise, the image is resized with BILINEAR interpolation such
    that the aspect ratio is maintained.

    Args:
        image (:obj:`tf.Tensor`): Image tensor [height, width, channels].
        char_bbox (:obj:`tf.Tensor`): Character bounding box [4].
        line_bbox (:obj:`tf.Tensor`): Line bounding box [4].
        shape (:obj:`tuple` of :obj:`int`): Output shape.
        maintain_aspect (bool): Maintain the aspect ratio.

    Returns:
        :obj:`tf.Tensor`: Resized image.
        :obj:`tf.Tensor`: Adjusted character bounding boxes.
        :obj:`tf.Tensor`: Adjusted line bounding boxes.
    """

    def shrink(h, w, ratio):
        def f1():
            scale = 1. / ratio
            new_h = h * scale
            new_w = w * scale
            return new_h, new_w

        def f2():
            return h, w

        h, w = tf.cond(tf.greater(ratio, 1), f1, f2)
        return h, w

    h_orig = tf.to_float(tf.shape(image)[0])
    w_orig = tf.to_float(tf.shape(image)[1])

    if maintain_aspect:
        # Shrink height
        h_ratio = tf.cast(h_orig / shape[0], tf.float32)
        height, width = shrink(h_orig, w_orig, h_ratio)

        # Shrink width
        w_ratio = tf.cast(width / shape[1], tf.float32)
        height, width = shrink(height, width, w_ratio)

        # Final resize
        image = tf.image.resize_images(
            image, size=[tf.to_int32(height), tf.to_int32(width)])
        h_ratio = tf.cast(height / shape[0], tf.float32)
        w_ratio = tf.cast(width / shape[1], tf.float32)
    else:
        height = h_orig
        width = w_orig
        h_ratio = tf.cast(tf.minimum(height / shape[0], 1), tf.float32)
        w_ratio = tf.cast(tf.minimum(width / shape[1], 1), tf.float32)
        image = tf.image.resize_images(image, size=shape)

    # Padding
    h_diff = tf.maximum(shape[0] - height, 0)
    w_diff = tf.maximum(shape[1] - width, 0)

    h_pad = h_diff / 2
    h_pad_ratio = tf.cast(h_pad / shape[0], tf.float32)
    h_pad = h_pad[tf.newaxis, tf.newaxis]
    w_pad = w_diff / 2
    w_pad_ratio = tf.cast(w_pad / shape[1], tf.float32)
    w_pad = w_pad[tf.newaxis, tf.newaxis]

    paddings = tf.concat(
        [tf.concat([tf.to_int32(h_pad), tf.to_int32(tf.ceil(h_pad))], axis=1),
         tf.concat([tf.to_int32(w_pad), tf.to_int32(tf.ceil(w_pad))], axis=1),
         tf.constant([[0, 0]])], axis=0
    )
    c = tf.reduce_mean(image)
    image = tf.pad(image, paddings, mode='CONSTANT', constant_values=c)

    # Correct Bounding boxes
    ratios = tf.stack([h_ratio, w_ratio, h_ratio, w_ratio])
    border = tf.stack([h_pad_ratio, w_pad_ratio, h_pad_ratio, w_pad_ratio])
    char_bbox_adjusted = tf.multiply(char_bbox, ratios) + border
    line_bbox_adjusted = tf.multiply(line_bbox, ratios) + border

    return image, char_bbox_adjusted, line_bbox_adjusted


def distort_image(image):
    # TODO: only works for single chars, broken if bounding boxes used
    # pad with average to avoid empty space padding
    image = tf.pad(tensor=image,
                   paddings=tf.constant([[16, 16], [16, 16], [0, 0]]),
                   constant_values=tf.reduce_mean(image))
    # rotate
    image = tf.contrib.image.rotate(
        image,
        tf.random_uniform(shape=[1],
                          minval=-0.3,
                          maxval=0.3)
    )
    # crop
    image = tf.image.central_crop(image, 2./3.)
    return image