Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
loss="sparse_categorical_crossentropy", optimizer=opt, metrics=["accuracy"]
)
history = model.fit(
x_train, y_train, batch_size=16, epochs=5, validation_split=0.2, callbacks=[hook]
)
test_scores = model.evaluate(x_test, y_test, verbose=2, callbacks=[hook])
else:
model.compile(
loss="sparse_categorical_crossentropy", optimizer=opt, metrics=["accuracy"]
)
history = model.fit(x_train, y_train, batch_size=16, epochs=5, validation_split=0.2)
test_scores = model.evaluate(x_test, y_test, verbose=2)
# Check that hook created and tensors saved
trial = smd.create_trial(path=sim.out_dir)
assert smd.get_hook() is not None, "Hook was not created."
assert len(trial.steps()) > 0, "Nothing saved at any step."
assert len(trial.tensor_names()) > 0, "Tensors were not saved."
assert len(trial.tensor_names(collection="gradients")) > 0
if not tf_optimizer:
# as this is only supported for keras optimizers currently
assert len(trial.tensor_names(collection="optimizer_variables")) > 0
with SagemakerSimulator(json_file_contents=json_file_contents) as sim:
train_op, X, Y = get_train_op_and_placeholders()
init = tf.compat.v1.global_variables_initializer()
mnist = get_data()
sess = tf.train.MonitoredSession()
with sess:
sess.run(init)
for step in range(1, 101):
batch_x, batch_y = mnist.train.next_batch(32)
sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
# Check that hook created and tensors saved
trial = smd.create_trial(path=sim.out_dir)
assert smd.get_hook() is not None, "Hook was not created."
assert len(trial.steps()) > 0, "Nothing saved at any step."
assert len(trial.tensor_names()) > 0, "Tensors were not saved."
assert len(trial.tensor_names(collection="gradients")) > 0
if mirrored:
test_basic("/opt/ml/output/tensors", zcc=True)
else:
# Setup
mnist_classifier = get_estimator(nested_optimizer=nested, mirrored=mirrored)
train_input_fn, eval_input_fn = get_input_fns()
# Train and evaluate
train_steps, eval_steps = 10, 10
mnist_classifier.train(input_fn=train_input_fn, steps=train_steps)
mnist_classifier.evaluate(input_fn=eval_input_fn, steps=eval_steps)
# Check that hook created and tensors saved
trial = smd.create_trial(path=sim.out_dir)
print(trial)
assert smd.get_hook() is not None, "Hook was not created."
assert len(trial.steps()) > 0, "Nothing saved at any step."
assert len(trial.tensor_names()) > 0, "Tensors were not saved."
assert trial.steps() == [
0,
2,
4,
6,
8,
10,
12,
14,
16,
18,
], "Wrong step count for trial."
print(trial.tensor_names(collection="gradients"))
assert len(trial.tensor_names(collection="gradients")) > 0
)
else:
mnist_classifier.evaluate(input_fn=input_fn_provider.eval_input_fn, steps=num_steps)
elif s == "predict":
print("Starting predict")
if not zcc:
ts_hook.set_mode(smd.modes.PREDICT)
# Evaluate the model and print results
p = mnist_classifier.predict(
input_fn=input_fn_provider.eval_input_fn, hooks=[ts_hook]
)
else:
p = mnist_classifier.predict(input_fn=input_fn_provider.eval_input_fn)
for i in range(num_steps):
next(p)
get_hook()._cleanup()
return distribution
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
# Calculate Loss (for both TRAIN and EVAL modes)
onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10)
loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels, logits=logits)
# Configure the Training Op (for TRAIN mode)
if mode == tf.estimator.ModeKeys.TRAIN:
# Horovod: scale learning rate by the number of workers.
optimizer = tf.train.MomentumOptimizer(learning_rate=0.001 * hvd.size(), momentum=0.9)
# Horovod: add Horovod Distributed Optimizer.
optimizer = hvd.DistributedOptimizer(optimizer)
# Add smdebug Optimizer
optimizer = smd.get_hook().wrap_optimizer(optimizer)
train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
# Add evaluation metrics (for EVAL mode)
eval_metric_ops = {
"accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])
}
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
shortcut = x
else:
shortcut = builder.max_pooling2d(x, 1, stride)
else:
shortcut = builder.conv2d_linear(x, depth, 1, stride, "SAME")
if basic:
x = builder.pad2d(x, 1)
x = builder.conv2d(x, depth_bottleneck, 3, stride, "VALID")
x = builder.conv2d_linear(x, depth, 3, 1, "SAME")
else:
x = builder.conv2d(x, depth_bottleneck, 1, 1, "SAME")
x = builder.conv2d(x, depth_bottleneck, 3, stride, "SAME")
# x = builder.conv2d_linear(x, depth, 1, 1, 'SAME')
x = builder.conv2d_linear_last_bn(x, depth, 1, 1, "SAME")
x = tf.nn.relu(x + shortcut)
smd.get_hook().add_to_collection("relu_activations", x)
return x
cdr_alpha,
lc_periods,
lc_alpha,
lc_beta,
),
)
learning_rate = tf.identity(learning_rate, "learning_rate")
tf.summary.scalar("learning_rate", learning_rate)
opt = tf.train.MomentumOptimizer(learning_rate, momentum, use_nesterov=True)
opt = hvd.DistributedOptimizer(opt)
if use_larc:
opt = LarcOptimizer(opt, learning_rate, leta, clip=True)
opt = MixedPrecisionOptimizer(opt, scale=loss_scale)
opt = smd.get_hook().wrap_optimizer(opt)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) or []
with tf.control_dependencies(update_ops):
gate_gradients = tf.train.Optimizer.GATE_NONE
train_op = opt.minimize(
total_loss, global_step=tf.train.get_global_step(), gate_gradients=gate_gradients
)
train_op = tf.group(preload_op, gpucopy_op, train_op)
return tf.estimator.EstimatorSpec(mode, loss=total_loss, train_op=train_op)