Examples¶
Single Character Task¶
Below is an example Task definition for a single character recognition task
and the corresponding import in __init__.py
for accessing the task through
the registry.
For more details on Task definition and default properties, please refer to the Tasks documentation.
ocr.py¶
#
# Copyright (C) 2018 Neal Digre.
#
# This software may be modified and distributed under the terms
# of the MIT license. See the LICENSE file for details.
"""Optical character recognition tasks.
TODO:
* Modularize common loss functions, select by id
* Modularize common regularization options, select by id
"""
import abc
import tensorflow as tf
from carpedm.tasks.generic import Task
from carpedm.util import registry
from carpedm.util.eval import confusion_matrix
class OCRTask(Task):
"""Abstract class for OCR Tasks."""
def __init__(self, **kwargs):
super(OCRTask, self).__init__(**kwargs)
@property
def target(self):
return 'image/seq/char/id'
@property
def blocks(self):
return False
@property
def character(self):
return True
@property
def line(self):
return False
@property
def label(self):
return True
@property
def bbox(self):
return False
@property
@abc.abstractmethod
def sparse_labels(self):
return False
def regularization(self, hparams):
raise NotImplementedError
def results(self, loss, tower_features, tower_preds, tower_targets,
is_training):
raise NotImplementedError
def loss_fn(self, features, model_output, targets, is_training):
raise NotImplementedError
@registry.register_task
class OCRSingleKana(OCRTask):
"""Single character recognition tasks."""
@property
def image_scope(self):
return 'char'
@property
def character_set(self):
return 'kana'
def results(self, loss, tower_features, tower_preds, tower_targets,
is_training):
tensors_to_log = {'loss': loss}
tf.summary.image("sample_input", tower_features[0]['image/data'])
all_logits = tf.concat([p for p in tower_preds], axis=0)
predictions = {
'classes': tf.argmax(all_logits, axis=1),
'probabilities': tf.nn.softmax(all_logits)
}
stacked_labels = tf.squeeze(tf.concat(tower_targets, axis=0))
confusion_matrix(
stacked_labels, predictions['classes'], self.num_classes)
accuracy = tf.metrics.accuracy(stacked_labels, predictions['classes'])
metrics = {
'accuracy' if is_training else 'accuracy': accuracy
}
return tensors_to_log, predictions, metrics
def loss_fn(self, features, model_output, targets, is_training):
with tf.name_scope('batch_xentropy'):
loss = tf.losses.sparse_softmax_cross_entropy(
logits=model_output, labels=targets)
return loss
def regularization(self, hparams):
model_params = tf.trainable_variables()
weight_loss = tf.multiply(
hparams.weight_decay,
tf.add_n([tf.nn.l2_loss(v) for v in model_params]),
name='weight_loss')
return weight_loss
@property
def sparse_labels(self):
return False
tasks.__init__.py¶
#
# Copyright (C) 2018 Neal Digre.
#
# This software may be modified and distributed under the terms
# of the MIT license. See the LICENSE file for details.
from carpedm.tasks import generic
# Defined tasks. Imports here force registration.
from carpedm.tasks.ocr import OCRSingleKana
Baseline Model¶
baseline.py¶
#
# Copyright (C) 2018 Neal Digre.
#
# This software may be modified and distributed under the terms
# of the MIT license. See the LICENSE file for details.
"""Baseline models."""
import tensorflow as tf
from carpedm.models.generic import TFModel
from carpedm import nn
from carpedm.util import registry
@registry.register_model
class SingleCharBaseline(TFModel):
"""A simple baseline CNN model."""
def __init__(self, num_classes, *args, **kwargs):
"""Initializer.
Overrides TFModel.
Args:
num_classes: Number of possible character classes.
*args: Unused arguments.
**kwargs: Unused arguments.
"""
self._num_classes = num_classes
self._cnn = nn.conv.CNN()
@property
def name(self):
return "Baseline_" + self._cnn.name
def _forward_pass(self, features, data_format, axes_order,
is_training, reuse):
x = self._cnn.forward_pass(
features, data_format, axes_order, is_training, False, reuse)
x = tf.layers.flatten(x)
tf.logging.info('image after flatten: %s', x.get_shape())
x = tf.layers.dense(
inputs=x, units=200, activation=tf.nn.relu, name='dense1')
nn.util.activation_summary(x)
x = tf.layers.dense(
inputs=x, units=200, activation=tf.nn.relu, name='dense2')
nn.util.activation_summary(x)
logits = tf.layers.dense(
inputs=x, units=self._num_classes, name='logits')
return logits
models.__init__.py¶
#
# Copyright (C) 2018 Neal Digre.
#
# This software may be modified and distributed under the terms
# of the MIT license. See the LICENSE file for details.
from carpedm.models import generic
# Defined models. Imports here force registration.
from carpedm.models.baseline import SingleCharBaseline
Using Tasks and Models¶
Below is a minimal main.py
example for getting started training a model using the Task interface.
For an in-depth description, please refer to the guide Training a Model.
#
# Copyright (C) 2018 Neal Digre.
#
# This software may be modified and distributed under the terms
# of the MIT license. See the LICENSE file for details.
"""Minimal main module.
If this file is changed, please also change the ``:lines:`` option in
the following files where this code is referenced with the
``literalinclude`` directive.
* ../guides/usage.rst
"""
import os
import re
import tensorflow as tf
import carpedm as dm
from carpedm.util import registry
tf.logging.set_verbosity(tf.logging.INFO)
# Task definition
args = {'data_dir': dm.data.sample,
'task_dir': '/tmp/carpedm_tasks',
'shape_store': None,
'shape_in': (64, 64)}
task = registry.task('ocr_single_kana')(**args)
# Training Hyperparameters
num_epochs = 30
training_hparams = {'train_batch_size': 32,
'eval_batch_size': 1,
'data_format': 'channels_last',
'optimizer': 'sgd',
'learning_rate': 1e-3,
'momentum': 0.96,
'weight_decay': 2e-4,
'gradient_clipping': None,
'lr_decay_steps': None,
'init_dir': None, # for pre-trained models
'sync': False}
# Model hyperparameters and definition
model_hparams = {}
model = registry.model('single_char_baseline')(num_classes=task.num_classes, **model_hparams)
# Unique job_id
experiment_id = 'example'
shape = re.sub(r'([,])', '_', re.sub(r'([() ])', '', str(args['shape_in'])))
job_id = os.path.join(experiment_id, shape, model.name)
task.job_id = job_id # Used to check for first model initialization.
job_dir = os.path.join(task.task_log_dir, job_id)
# TensorFlow Configuration
sess_config = tf.ConfigProto(
allow_soft_placement=True,
log_device_placement=False,
intra_op_parallelism_threads=0,
gpu_options=tf.GPUOptions(force_gpu_compatible=True))
config = tf.estimator.RunConfig(session_config=sess_config,
model_dir=job_dir,
save_summary_steps=10)
hparams = tf.contrib.training.HParams(is_chief=config.is_chief,
**training_hparams)
# Input and model functions
train_input_fn = task.input_fn(hparams.train_batch_size,
subset='train',
num_shards=1,
overwrite=False)
eval_input_fn = task.input_fn(hparams.eval_batch_size,
subset='dev',
num_shards=1,
overwrite=False)
model_fn = task.model_fn(model, num_gpus=0, variable_strategy='CPU',
num_workers=config.num_worker_replicas or 1)
# Number of training steps
train_examples = dm.data.num_examples_per_epoch(task.task_data_dir, 'train')
eval_examples = dm.data.num_examples_per_epoch(task.task_data_dir, 'dev')
if eval_examples % hparams.eval_batch_size != 0:
raise ValueError(('validation set size (%d) must be multiple of '
'eval_batch_size (%d)') % (eval_examples,
hparams.eval_batch_size))
eval_steps = eval_examples // hparams.eval_batch_size
train_steps = num_epochs * ((train_examples // hparams.train_batch_size) or 1)
train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=train_steps)
eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn, steps=eval_steps)
# Estimator definition and training
estimator = tf.estimator.Estimator(model_fn=model_fn, config=config, params=hparams)
tf.estimator.train_and_evaluate(estimator, train_spec=train_spec, eval_spec=eval_spec)