# How to use the horovod.mxnet.rank function in horovod

## To help you get started, we’ve selected a few horovod examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

horovod / horovod / test / test_mxnet.py View on Github
``````# Threshold for floating point equality depends on number of
# ranks, since we're comparing against precise multiplication.
if size &lt;= 3 or dtype in ['int32', 'int64']:
threshold = 1
elif size &lt; 10:
threshold = 1e-4
elif size &lt; 15:
threshold = 5e-4
else:
break

if max_difference &gt; threshold:
print("average", count, dtype, dim, max_difference, threshold)
print("tensor", hvd.rank(), tensor)
print("averaged", hvd.rank(), averaged)
assert max_difference &lt;= threshold, 'hvd.allreduce produces \
incorrect results for average'``````
horovod / horovod / test / test_mxnet.py View on Github
``````broadcast_tensor = hvd.broadcast(tensor, root_rank=root_rank,
name=str(count))
if rank != root_rank:
if same(tensor.asnumpy(), root_tensor.asnumpy()):
mx.nd.max(tensor == root_tensor))
print("tensor", hvd.rank(), tensor)
print("root_tensor", hvd.rank(), root_tensor)
print("comparison", hvd.rank(), tensor == root_tensor)
assert not same(tensor.asnumpy(), root_tensor.asnumpy()), \
print("root_tensor", hvd.rank(), root_tensor)
print("comparison", hvd.rank(),
dmlc / gluon-nlp / scripts / bert / finetune_squad.py View on Github
``````fh = logging.FileHandler(os.path.join(args.output_dir, 'finetune_squad.log'),
mode='w')
fh.setLevel(logging.INFO)
fh.setFormatter(formatter)
console = logging.StreamHandler()
console.setLevel(logging.INFO)
console.setFormatter(formatter)

log.info(args)

if args.comm_backend == 'horovod':
import horovod.mxnet as hvd
hvd.init()
rank = hvd.rank()
size = hvd.size()
local_rank = hvd.local_rank()
else:
rank = 0
size = 1
local_rank = 0

if args.dtype == 'float16':
from mxnet.contrib import amp
amp.init()

model_name = args.bert_model
dataset_name = args.bert_dataset
only_predict = args.only_predict
model_parameters = args.model_parameters
pretrained_bert_parameters = args.pretrained_bert_parameters``````
dmlc / gluon-nlp / scripts / pretraining / run_electra.py View on Github
``````def init_comm(backend, gpus):
"""Init communication backend"""
# backend specific implementation
if backend == 'horovod':
try:
import horovod.mxnet as hvd  # pylint: disable=import-outside-toplevel
except ImportError:
logging.info('horovod must be installed.')
sys.exit(1)
hvd.init()
store = None
num_workers = hvd.size()
rank = hvd.rank()
local_rank = hvd.local_rank()
is_master_node = rank == local_rank
ctx_l = [mx.gpu(local_rank)]
logging.info('GPU communication supported by horovod')
else:
store = mx.kv.create(backend)
num_workers = store.num_workers
rank = store.rank
local_rank = 0
is_master_node = rank == local_rank
if gpus == '-1' or gpus == '':
ctx_l = [mx.cpu()]
logging.info('Runing on CPU')
else:
ctx_l = [mx.gpu(int(x)) for x in gpus.split(',')]
logging.info('GPU communication supported by KVStore')``````
dmlc / gluon-nlp / scripts / bert / run_pretraining.py View on Github
``````def init_comm(backend):
"""Init communication backend"""
# backend specific implementation
if backend == 'horovod':
try:
import horovod.mxnet as hvd  # pylint: disable=import-outside-toplevel
except ImportError:
logging.info('horovod must be installed.')
sys.exit(1)
hvd.init()
store = None
num_workers = hvd.size()
rank = hvd.rank()
local_rank = hvd.local_rank()
is_master_node = rank == local_rank
ctxs = [mx.gpu(local_rank)]
else:
# kvstore
store = mx.kv.create(backend)
num_workers = store.num_workers
rank = store.rank
local_rank = 0
is_master_node = rank == local_rank
ctxs = [mx.cpu()] if args.gpus is None or args.gpus == '' else \
[mx.gpu(int(x)) for x in args.gpus.split(',')]
return store, num_workers, rank, local_rank, is_master_node, ctxs``````
hpi-xnor / BMXNet-v2 / example / distributed_training-horovod / gluon_mnist.py View on Github
``````dirname=data_dir)
with zipfile.ZipFile(zip_file_path) as zf:
zf.extractall(data_dir)

input_shape = (1, 28, 28)
batch_size = args.batch_size

train_iter = mx.io.MNISTIter(
image="%s/train-images-idx3-ubyte" % data_dir,
label="%s/train-labels-idx1-ubyte" % data_dir,
input_shape=input_shape,
batch_size=batch_size,
shuffle=True,
flat=False,
num_parts=hvd.size(),
part_index=hvd.rank()
)

val_iter = mx.io.MNISTIter(
image="%s/t10k-images-idx3-ubyte" % data_dir,
label="%s/t10k-labels-idx1-ubyte" % data_dir,
input_shape=input_shape,
batch_size=batch_size,
flat=False,
)

return train_iter, val_iter``````
hpi-xnor / BMXNet-v2 / example / distributed_training-horovod / module_mnist.py View on Github
``````input_shape=input_shape,
batch_size=batch_size,
shuffle=True,
flat=False,
num_parts=hvd.size(),
part_index=hvd.rank()
)

val_iter = mx.io.MNISTIter(
image="%s/t10k-images-idx3-ubyte" % data_dir,
label="%s/t10k-labels-idx1-ubyte" % data_dir,
input_shape=input_shape,
batch_size=batch_size,
flat=False,
num_parts=hvd.size(),
part_index=hvd.rank()
)

return train_iter, val_iter``````
awslabs / sagemaker-debugger / examples / mxnet / scripts / mnist_mxnet_hvd.py View on Github
``````zip_file_path = download("http://data.mxnet.io/mxnet/data/mnist.zip", dirname=data_dir)
with zipfile.ZipFile(zip_file_path) as zf:
zf.extractall(data_dir)

input_shape = (1, 28, 28)
batch_size = args.batch_size

train_iter = mx.io.MNISTIter(
image="%s/train-images-idx3-ubyte" % data_dir,
label="%s/train-labels-idx1-ubyte" % data_dir,
input_shape=input_shape,
batch_size=batch_size,
shuffle=True,
flat=False,
num_parts=hvd.size(),
part_index=hvd.rank(),
)

val_iter = mx.io.MNISTIter(
image="%s/t10k-images-idx3-ubyte" % data_dir,
label="%s/t10k-labels-idx1-ubyte" % data_dir,
input_shape=input_shape,
batch_size=batch_size,
flat=False,
)

return train_iter, val_iter``````
hpi-xnor / BMXNet-v2 / example / distributed_training-horovod / module_mnist.py View on Github
``````dirname=data_dir)
with zipfile.ZipFile(zip_file_path) as zf:
zf.extractall(data_dir)

input_shape = (1, 28, 28)
batch_size = args.batch_size

train_iter = mx.io.MNISTIter(
image="%s/train-images-idx3-ubyte" % data_dir,
label="%s/train-labels-idx1-ubyte" % data_dir,
input_shape=input_shape,
batch_size=batch_size,
shuffle=True,
flat=False,
num_parts=hvd.size(),
part_index=hvd.rank()
)

val_iter = mx.io.MNISTIter(
image="%s/t10k-images-idx3-ubyte" % data_dir,
label="%s/t10k-labels-idx1-ubyte" % data_dir,
input_shape=input_shape,
batch_size=batch_size,
flat=False,
num_parts=hvd.size(),
part_index=hvd.rank()
)

return train_iter, val_iter``````

## horovod

Distributed training framework for TensorFlow, Keras, PyTorch, and Apache MXNet.

Apache-2.0