Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_mode_writing():
run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f")
worker = socket.gethostname()
for s in range(0, 10):
fw = FileWriter(trial_dir="/tmp/ts_outputs/" + run_id, step=s, worker=worker)
if s % 2 == 0:
fw.write_tensor(
tdata=np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32),
tname="arr",
mode=ModeKeys.TRAIN,
mode_step=s // 2,
)
else:
fw.write_tensor(
tdata=np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32),
tname="arr",
mode=ModeKeys.EVAL,
mode_step=s // 2,
)
fw.close()
write_dummy_collection_file("/tmp/ts_outputs/" + run_id)
files = glob.glob("/tmp/ts_outputs/" + run_id + "/**/*.tfevents", recursive=True)
global_steps = []
train_steps = []
eval_steps = []
def test_mode_data():
run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f")
trial_dir = "/tmp/ts_outputs/" + run_id
c = CollectionManager()
c.add("default")
c.get("default").tensor_names = ["arr"]
c.export(trial_dir, DEFAULT_COLLECTIONS_FILE_NAME)
tr = create_trial(trial_dir)
worker = socket.gethostname()
for s in range(0, 10):
fw = FileWriter(trial_dir=trial_dir, step=s, worker=worker)
if s % 2 == 0:
fw.write_tensor(
tdata=np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32),
tname="arr",
mode=modes.TRAIN,
mode_step=s // 2,
)
else:
fw.write_tensor(
)
# Setup the Tornasole Hook
# save tensors as reductions if necessary
rdnc = (
smd.ReductionConfig(reductions=["mean"], abs_reductions=["max"], norms=["l1"])
if FLAGS.reductions
else None
)
ts_hook = smd.SessionHook(
out_dir=FLAGS.smdebug_path,
save_all=FLAGS.save_all,
include_collections=["weights", "gradients", "losses", "biases"],
save_config=smd.SaveConfig(save_interval=FLAGS.save_frequency),
reduction_config=rdnc,
)
ts_hook.set_mode(smd.modes.TRAIN)
# Horovod: adjust number of steps based on number of GPUs.
mnist_classifier.train(
input_fn=train_input_fn,
steps=FLAGS.steps // hvd.size(),
hooks=[logging_hook, bcast_hook, ts_hook],
)
# Evaluate the model and print results
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": eval_data}, y=eval_labels, num_epochs=1, shuffle=False
)
opt = hook.wrap_optimizer(opt)
model.compile(
optimizer=opt,
loss="sparse_categorical_crossentropy",
run_eagerly=False,
metrics=["accuracy"],
)
hooks = [hook]
hook.save_scalar("tf_keras_num_steps", steps, sm_metric=True)
hook.save_scalar("tf_keras_before_train", 1, sm_metric=False)
hook.set_mode(ModeKeys.TRAIN)
model.fit(x_train, y_train, epochs=1, steps_per_epoch=steps, callbacks=hooks, verbose=0)
hook.set_mode(ModeKeys.EVAL)
model.evaluate(x_test, y_test, steps=10, callbacks=hooks, verbose=0)
hook.save_scalar("tf_keras_after_train", 1, sm_metric=False)
)
opt = tf.train.RMSPropOptimizer(lr)
opt = hook.wrap_optimizer(opt)
model.compile(
optimizer=opt,
loss="sparse_categorical_crossentropy",
run_eagerly=False,
metrics=["accuracy"],
)
hooks = [hook]
hook.save_scalar("tf_keras_num_steps", steps, sm_metric=True)
hook.save_scalar("tf_keras_before_train", 1, sm_metric=False)
hook.set_mode(ModeKeys.TRAIN)
model.fit(x_train, y_train, epochs=1, steps_per_epoch=steps, callbacks=hooks, verbose=0)
hook.set_mode(ModeKeys.EVAL)
model.evaluate(x_test, y_test, steps=10, callbacks=hooks, verbose=0)
hook.save_scalar("tf_keras_after_train", 1, sm_metric=False)
model.compile(
loss="sparse_categorical_crossentropy", optimizer=opt, metrics=["accuracy"]
)
history = model.fit(
x_train, y_train, batch_size=16, epochs=5, validation_split=0.2, callbacks=[hook]
)
test_scores = model.evaluate(x_test, y_test, verbose=2, callbacks=[hook])
else:
model.compile(
loss="sparse_categorical_crossentropy", optimizer=opt, metrics=["accuracy"]
)
history = model.fit(x_train, y_train, batch_size=16, epochs=5, validation_split=0.2)
test_scores = model.evaluate(x_test, y_test, verbose=2)
# Check that hook created and tensors saved
trial = smd.create_trial(path=sim.out_dir)
assert smd.get_hook() is not None, "Hook was not created."
assert len(trial.steps()) > 0, "Nothing saved at any step."
assert len(trial.tensor_names()) > 0, "Tensors were not saved."
assert len(trial.tensor_names(collection="gradients")) > 0
if not tf_optimizer:
# as this is only supported for keras optimizers currently
assert len(trial.tensor_names(collection="optimizer_variables")) > 0
loss="sparse_categorical_crossentropy", optimizer=opt, metrics=["accuracy"]
)
history = model.fit(
x_train, y_train, batch_size=16, epochs=5, validation_split=0.2, callbacks=[hook]
)
test_scores = model.evaluate(x_test, y_test, verbose=2, callbacks=[hook])
else:
model.compile(
loss="sparse_categorical_crossentropy", optimizer=opt, metrics=["accuracy"]
)
history = model.fit(x_train, y_train, batch_size=16, epochs=5, validation_split=0.2)
test_scores = model.evaluate(x_test, y_test, verbose=2)
# Check that hook created and tensors saved
trial = smd.create_trial(path=sim.out_dir)
assert smd.get_hook() is not None, "Hook was not created."
assert len(trial.steps()) > 0, "Nothing saved at any step."
assert len(trial.tensor_names()) > 0, "Tensors were not saved."
assert len(trial.tensor_names(collection="gradients")) > 0
if not tf_optimizer:
# as this is only supported for keras optimizers currently
assert len(trial.tensor_names(collection="optimizer_variables")) > 0
def test_fetch_tensor_with_present_event_files():
"""
events files present: [0, 18, 27, 36, ...., 190]
index files present: [0, 9, 18, 27, 36, ...., 190, 199]
end_of_job file : present
"""
path = "s3://smdebug-testing/resources/event-files-missing"
trial = create_trial(path)
# Get value from an event file that is present
trial.tensor("gradients/pow_grad/sub:0").value(0)
if script_mode:
hook.record_tensor_value(tensor_name="loss", tensor_value=loss)
loss.backward()
optimizer.step()
if i == 499: # print every 2000 mini-batches
break
print("Finished Training")
hook = smd.get_hook()
print(f"hook = {hook}")
from smdebug.trials import create_trial
trial = create_trial(path=sim.out_dir)
print(f"trial.steps() = {trial.steps()}")
print(f"trial.tensor_names() = {trial.tensor_names()}")
print(f"collection_manager = {hook.collection_manager}")
losses_tensors = hook.collection_manager.get("losses").tensor_names
print(f"'losses' collection tensor_names = {losses_tensors}")
assert len(losses_tensors) > 0
assert all(
[
name in trial.tensor_names()
for name in hook.collection_manager.get("losses").tensor_names
]
def test_save_all(hook=None, out_dir=None):
hook_created = False
if hook is None:
hook_created = True
save_config = SaveConfig(save_steps=[0, 1, 2, 3])
run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f")
out_dir = "/tmp/" + run_id
print("Registering the hook with out_dir {}".format(out_dir))
hook = t_hook(out_dir=out_dir, save_config=save_config, save_all=True)
run_mnist_gluon_model(hook=hook, num_steps_train=7, num_steps_eval=5)
# assert for steps and tensor_names
print("Created the trial with out_dir {}".format(out_dir))
tr = create_trial(out_dir)
tensor_list = tr.tensor_names()
assert tr
assert len(tr.steps()) == 4
# some tensor names, like input and output, can't be retrieved from training session, so here we only assert for tensor numbers
# 46 is gotten from index file
# if no assertion failure, then the script could save all tensors
assert len(tensor_list) == 46
if hook_created:
shutil.rmtree(out_dir)