How to use the smdebug.mxnet.hook.Hook function in smdebug

To help you get started, we’ve selected a few smdebug examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github awslabs / sagemaker-debugger / tests / mxnet / test_modes.py View on Github external
def test_modes(hook=None, path=None):
    if hook is None:
        run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f")
        path = "/tmp/" + run_id
        hook = t_hook(
            out_dir=path,
            save_config=SaveConfig(
                {
                    modes.TRAIN: SaveConfigMode(save_interval=2),
                    modes.EVAL: SaveConfigMode(save_interval=3),
                }
            ),
            include_collections=["gradients", "weights"],
        )
    run_mnist_gluon_model(
        hook=hook, set_modes=True, register_to_loss_block=True, num_steps_train=6, num_steps_eval=6
    )

    tr = create_trial(path)
    assert len(tr.modes()) == 2
    assert len(tr.steps()) == 5, tr.steps()
github awslabs / sagemaker-debugger / tests / mxnet / test_hook_reduce_config.py View on Github external
def test_save_config(hook=None, out_dir=None):
    hook_created = False
    if hook is None:
        hook_created = True
        global_reduce_config = ReductionConfig(reductions=["max", "mean"])
        global_save_config = SaveConfig(save_steps=[0, 1, 2, 3])

        run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f")
        out_dir = "/tmp/newlogsRunTest/" + run_id
        print("Registering the hook with out_dir {0}".format(out_dir))
        hook = t_hook(
            out_dir=out_dir,
            save_config=global_save_config,
            include_collections=[
                "weights",
                "biases",
                "gradients",
                "default",
                "ReluActivation",
                "flatten",
            ],
            reduction_config=global_reduce_config,
        )
        hook.get_collection("ReluActivation").include(["relu*"])
        hook.get_collection("ReluActivation").save_config = SaveConfig(save_steps=[4, 5, 6])
        hook.get_collection("ReluActivation").reduction_config = ReductionConfig(
            reductions=["min"], abs_reductions=["max"]
github awslabs / sagemaker-debugger / tests / mxnet / test_hook_save_config.py View on Github external
def test_save_config(hook=None):
    if hook is None:
        save_config_collection = SaveConfig(save_steps=[4, 5, 6])
        run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f")
        out_dir = "/tmp/" + run_id
        save_config = SaveConfig(save_steps=[0, 1, 2, 3])
        hook = t_hook(
            out_dir=out_dir,
            save_config=save_config,
            include_collections=["ReluActivation", "weights", "biases", "gradients", "default"],
        )
        custom_collect = hook.get_collection("ReluActivation")
        custom_collect.save_config = save_config_collection
        custom_collect.include(["relu*", "input_*", "output*"])

    run_mnist_gluon_model(hook=hook, num_steps_train=10, num_steps_eval=10)
    if hook is None:
        shutil.rmtree(out_dir)
github awslabs / sagemaker-debugger / tests / mxnet / test_hook_custom_collection.py View on Github external
def test_hook_custom_collection():
    save_config = SaveConfig(save_steps=[0, 1, 2, 3])
    run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f")
    out_dir = "/tmp/" + run_id
    hook = t_hook(out_dir=out_dir, save_config=save_config, include_collections=["ReluActivation"])
    hook.get_collection("ReluActivation").include(["relu*", "input_*"])
    run_mnist_gluon_model(hook=hook, num_steps_train=10, num_steps_eval=10)
    shutil.rmtree(out_dir)
github awslabs / sagemaker-debugger / tests / mxnet / test_hook_save_config.py View on Github external
def test_save_config_hookjson_config():
    from smdebug.core.json_config import CONFIG_FILE_PATH_ENV_STR

    out_dir = "/tmp/test_hook_from_json_config_full"
    shutil.rmtree(out_dir, True)
    os.environ[
        CONFIG_FILE_PATH_ENV_STR
    ] = "tests/mxnet/test_json_configs/test_save_config_hookjson_config.json"
    hook = t_hook.create_from_json_file()
    test_save_config(hook=hook)
    shutil.rmtree(out_dir, True)
github awslabs / sagemaker-debugger / tests / mxnet / test_hook_save_all.py View on Github external
def test_save_all(hook=None, out_dir=None):
    hook_created = False
    if hook is None:
        hook_created = True
        save_config = SaveConfig(save_steps=[0, 1, 2, 3])
        run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f")
        out_dir = "/tmp/" + run_id
        print("Registering the hook with out_dir {}".format(out_dir))
        hook = t_hook(out_dir=out_dir, save_config=save_config, save_all=True)
    run_mnist_gluon_model(hook=hook, num_steps_train=7, num_steps_eval=5)
    # assert for steps and tensor_names
    print("Created the trial with out_dir {}".format(out_dir))
    tr = create_trial(out_dir)
    tensor_list = tr.tensor_names()
    assert tr
    assert len(tr.steps()) == 4
    # some tensor names, like input and output, can't be retrieved from training session, so here we only assert for tensor numbers
    # 46 is gotten from index file
    # if no assertion failure, then the script could save all tensors
    assert len(tensor_list) == 46
    if hook_created:
        shutil.rmtree(out_dir)
github awslabs / sagemaker-debugger / tests / mxnet / test_hook_loss_collection.py View on Github external
def test_loss_collection_default():
    save_config = SaveConfig(save_steps=[0, 1, 2, 3])
    run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f")
    out_dir = "newlogsRunTest/" + run_id
    hook = t_hook(out_dir=out_dir, save_config=save_config)
    assert has_training_ended(out_dir) == False
    run_mnist_gluon_model(
        hook=hook, num_steps_train=10, num_steps_eval=10, register_to_loss_block=True
    )

    print("Created the trial with out_dir {0}".format(out_dir))
    tr = create_trial(out_dir)
    assert tr
    assert len(tr.steps()) == 4

    print(tr.tensor_names())
    tname = tr.tensor_names(regex=".*loss")[0]
    loss_tensor = tr.tensor(tname)
    loss_val = loss_tensor.value(step_num=1)
    assert len(loss_val) > 0
github awslabs / sagemaker-debugger / tests / mxnet / test_hook.py View on Github external
def test_hook():
    save_config = SaveConfig(save_steps=[0, 1, 2, 3])
    run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f")
    out_dir = "/tmp/newlogsRunTest/" + run_id
    hook = t_hook(out_dir=out_dir, save_config=save_config)
    assert has_training_ended(out_dir) == False
    run_mnist_gluon_model(
        hook=hook, num_steps_train=10, num_steps_eval=10, register_to_loss_block=True
    )
    shutil.rmtree(out_dir)
github awslabs / sagemaker-debugger / tests / mxnet / test_hook_reduce_config.py View on Github external
def test_save_config_hook_from_json():
    from smdebug.core.json_config import CONFIG_FILE_PATH_ENV_STR
    import os

    out_dir = "/tmp/newlogsRunTest2/test_hook_reduce_config_hook_from_json"
    shutil.rmtree(out_dir, True)
    os.environ[
        CONFIG_FILE_PATH_ENV_STR
    ] = "tests/mxnet/test_json_configs/test_hook_reduce_config_hook.json"
    hook = t_hook.create_from_json_file()
    test_save_config(hook, out_dir)
    # delete output
    shutil.rmtree(out_dir, True)