How to use the horovod.tensorflow.DistributedOptimizer function in horovod

To help you get started, we’ve selected a few horovod examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github rlgraph / rlgraph / rlgraph / components / optimizers / horovod_optimizer.py View on Github external
def __init__(self, local_optimizer=None, **kwargs):
        """
        Initializes a distributed horovod optimizer by wrapping a local optimizer.

        Args:
            local_optimizer (Optional[dict,LocalOptimizer]): The spec-dict for the wrapped LocalOptimizer object or
                a LocalOptimizer object itself.
        """
        super(HorovodOptimizer, self).__init__(**kwargs)

        # Create the horovod wrapper.
        wrapped_local_optimizer = Optimizer.from_spec(local_optimizer)
        self.local_optimizer = hvd.DistributedOptimizer(wrapped_local_optimizer)

        @rlgraph_api
        def step(self, variables, loss, time_percentage, *inputs):
            grads_and_vars = self._graph_fn_calculate_gradients(variables, loss, time_percentage, *inputs)
            return self._graph_fn_apply_gradients(grads_and_vars)
github asyml / texar / examples / bert / bert_classifier_main_v2.py View on Github external
# Builds learning rate decay scheduler
    static_lr = config_downstream.lr['static_lr']
    num_train_steps = int(num_train_data / config_data.train_batch_size
                          * config_data.max_train_epoch)
    num_warmup_steps = int(num_train_steps * config_data.warmup_proportion)
    lr = model_utils.get_lr(global_step, num_train_steps,  # lr is a Tensor
                            num_warmup_steps, static_lr)

    opt = tx.core.get_optimizer(
        global_step=global_step,
        learning_rate=lr,
        hparams=config_downstream.opt
    )

    if FLAGS.distributed:
        opt = hvd.DistributedOptimizer(opt)

    train_op = tf.contrib.layers.optimize_loss(
        loss=loss,
        global_step=global_step,
        learning_rate=None,
        optimizer=opt)

    # Train/eval/test routine

    def _is_head():
        if not FLAGS.distributed:
            return True
        return hvd.rank() == 0

    def _train_epoch(sess):
        """Trains on the training set, and evaluates on the dev set
github asyml / texar / examples / distributed_gpu / lm_ptb_distributed.py View on Github external
# Losses & train ops
    mle_loss = tx.losses.sequence_sparse_softmax_cross_entropy(
        labels=targets,
        logits=outputs.logits,
        sequence_length=seq_lengths)

    # Use global_step to pass epoch, for lr decay
    global_step = tf.placeholder(tf.int32)

    opt = tx.core.get_optimizer(
        global_step=global_step,
        hparams=config.opt
    )

    # 2. wrap the optimizer
    opt = hvd.DistributedOptimizer(opt)

    train_op = tx.core.get_train_op(
        loss=mle_loss,
        optimizer=opt,
        global_step=global_step,
        learning_rate=None,
        increment_global_step=False,
        hparams=config.opt
    )

    def _run_epoch(sess, data_iter, epoch, is_train=False, verbose=False):
        start_time = time.time()
        loss = 0.
        iters = 0

        fetches = {
github NVIDIA / DALI / docs / examples / tensorflow / demo / nvutils / runner.py View on Github external
tf.summary.scalar('top5_accuracy', top5_accuracy[1])
        if mode == tf.estimator.ModeKeys.EVAL:
            metrics = {'top1_accuracy': top1_accuracy,
                       'top5_accuracy': top5_accuracy}
            return tf.estimator.EstimatorSpec(
                mode, loss=loss, eval_metric_ops=metrics)
        assert(mode == tf.estimator.ModeKeys.TRAIN)
        #batch_size = inputs.shape[0]
        batch_size = tf.shape(inputs)[0]
        learning_rate = tf.train.polynomial_decay(
            learning_rate_init, tf.train.get_global_step(),
            decay_steps=decay_steps, end_learning_rate=0.,
            power=learning_rate_power, cycle=False, name='learning_rate')
        opt = tf.train.MomentumOptimizer(
            learning_rate, momentum, use_nesterov=True)
        opt = hvd.DistributedOptimizer(opt)
        opt = nvutils.LarcOptimizer(opt, learning_rate, larc_eta, clip=larc_mode)
        opt = nvutils.LossScalingOptimizer(opt, scale=loss_scale)
        gate_gradients = (tf.train.Optimizer.GATE_OP if deterministic else
                          tf.train.Optimizer.GATE_NONE)
        train_op = opt.minimize(
            loss, global_step=tf.train.get_global_step(),
            gate_gradients=gate_gradients, name='step_update')
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) or []
        train_op = tf.group(train_op, update_ops)
        return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
github optuna / optuna / examples / horovod_simple.py View on Github external
def create_optimizer(trial):
    # We optimize the choice of optimizers as well as their parameters.
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-10, 1e-3)
    optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'MomentumSGD'])
    if optimizer_name == 'Adam':
        adam_lr = trial.suggest_loguniform('adam_lr', 1e-5, 1e-1)
        optimizer = tf.contrib.opt.AdamWOptimizer(learning_rate=adam_lr, weight_decay=weight_decay)
    else:
        momentum_sgd_lr = trial.suggest_loguniform('momentum_sgd_lr', 1e-5, 1e-1)
        momentum = trial.suggest_loguniform('momentum', 1e-5, 1e-1)
        optimizer = tf.contrib.opt.MomentumWOptimizer(
            learning_rate=momentum_sgd_lr, momentum=momentum, weight_decay=weight_decay)

    return hvd.DistributedOptimizer(optimizer)
github horovod / horovod / examples / tensorflow_mnist_estimator.py View on Github external
if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Calculate Loss (for both TRAIN and EVAL modes)
    onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10)
    loss = tf.losses.softmax_cross_entropy(
        onehot_labels=onehot_labels, logits=logits)

    # Configure the Training Op (for TRAIN mode)
    if mode == tf.estimator.ModeKeys.TRAIN:
        # Horovod: scale learning rate by the number of workers.
        optimizer = tf.train.MomentumOptimizer(
            learning_rate=0.001 * hvd.size(), momentum=0.9)

        # Horovod: add Horovod Distributed Optimizer.
        optimizer = hvd.DistributedOptimizer(optimizer)

        train_op = optimizer.minimize(
            loss=loss,
            global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss,
                                          train_op=train_op)

    # Add evaluation metrics (for EVAL mode)
    eval_metric_ops = {
        "accuracy": tf.metrics.accuracy(
            labels=labels, predictions=predictions["classes"])}
    return tf.estimator.EstimatorSpec(
        mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
github renmengye / revnet-public / run_imagenet_train_horovod.py View on Github external
with tf.variable_scope("Model", reuse=None):
      with log.verbose_level(2):
        m = get_model("resnet", config, **kwargs)

  global_step = tf.get_variable(
      "global_step", [],
      initializer=tf.constant_initializer(0),
      trainable=False,
      dtype=tf.int64)
  lr = tf.train.piecewise_constant(
      global_step, config.learn_rate_decay_steps,
      [config.learn_rate] + list(config.learn_rate_list))
  m._lr = lr
  m._global_step = global_step
  opt = tf.train.MomentumOptimizer(lr, 0.9)
  opt = hvd.DistributedOptimizer(opt)
  hooks = [hvd.BroadcastGlobalVariablesHook(0)]
  m._train_op = opt.minimize(m.cost, global_step=global_step, name="train_step")
  tf.summary.scalar("train ce", m.cross_ent)
  return m, hooks
github apcode / tensorflow_fasttext / classifier.py View on Github external
logits = tf.contrib.layers.fully_connected(
            inputs=input_layer, num_outputs=num_classes,
            activation_fn=None)
        predictions = tf.argmax(logits, axis=-1)
        probs = tf.nn.softmax(logits)
        loss, train_op = None, None
        metrics = {}
        if mode != tf.estimator.ModeKeys.PREDICT:
            label_lookup_table = tf.contrib.lookup.index_table_from_file(
                FLAGS.label_file, vocab_size=FLAGS.num_labels)
            labels = label_lookup_table.lookup(labels)
            loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=labels, logits=logits))
            opt = tf.train.AdamOptimizer(params["learning_rate"])
            if FLAGS.horovod:
                opt = hvd.DistributedOptimizer(opt)
            train_op = opt.minimize(loss, global_step=tf.train.get_global_step())
            metrics = {
                "accuracy": tf.metrics.accuracy(labels, predictions)
            }
        exports = {}
        if FLAGS.export_dir:
            exports = Exports(probs, text_embedding)
        return tf.estimator.EstimatorSpec(
            mode, predictions=predictions, loss=loss, train_op=train_op,
            eval_metric_ops=metrics, export_outputs=exports)
    session_config = tf.ConfigProto(
github tensorlayer / openpose-plus / train.py View on Github external
last_paf = net.last_paf  # net output
    confs_ = net.confs  # GT
    pafs_ = net.pafs  # GT
    mask = net.m1  # mask1, GT
    # net.m2 = m2                 # mask2, GT
    stage_losses = net.stage_losses
    l2_loss = net.l2_loss

    global_step = tf.Variable(1, trainable=False)
    # scaled_lr = lr_init * hvd.size()  # Horovod: scale the learning rate linearly
    scaled_lr = lr_init  # Linear scaling rule is not working in openpose training.
    with tf.variable_scope('learning_rate'):
        lr_v = tf.Variable(scaled_lr, trainable=False)

    opt = tf.train.MomentumOptimizer(lr_v, 0.9)
    opt = hvd.DistributedOptimizer(opt)  # Horovod
    train_op = opt.minimize(total_loss, global_step=global_step)
    config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)

    config.gpu_options.allow_growth = True  # Horovod
    config.gpu_options.visible_device_list = str(hvd.local_rank())  # Horovod

    # Add variable initializer.
    init = tf.global_variables_initializer()

    # Horovod: broadcast initial variable states from rank 0 to all other processes.
    # This is necessary to ensure consistent initialization of all workers when
    # training is started with random weights or restored from a checkpoint.
    bcast = hvd.broadcast_global_variables(0)  # Horovod

    # Horovod: adjust number of steps based on number of GPUs.
    global n_step, lr_decay_every_step