Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _test_dist_operations(sagemaker_session, image_uri, instance_type, dist_backend, train_instance_count=3):
with timeout(minutes=DEFAULT_TIMEOUT):
pytorch = PyTorch(entry_point=dist_operations_path,
role='SageMakerRole',
train_instance_count=train_instance_count,
train_instance_type=instance_type,
sagemaker_session=sagemaker_session,
image_name=image_uri,
hyperparameters={'backend': dist_backend})
pytorch.sagemaker_session.default_bucket()
fake_input = pytorch.sagemaker_session.upload_data(path=dist_operations_path,
key_prefix='pytorch/distributed_operations')
job_name = utils.unique_name_from_base('test-pytorch-dist-ops')
pytorch.fit({'required_argument': fake_input}, job_name=job_name)
def test_training_smdebug(sagemaker_session, ecr_image, instance_type):
hyperparameters = {'random_seed': True, 'num_steps': 50, 'smdebug_path': '/opt/ml/output/tensors', 'epochs': 1,
'data_dir': training_dir}
with timeout(minutes=DEFAULT_TIMEOUT):
pytorch = PyTorch(entry_point=smdebug_mnist_script,
role='SageMakerRole',
train_instance_count=1,
train_instance_type=instance_type,
sagemaker_session=sagemaker_session,
image_name=ecr_image,
hyperparameters=hyperparameters)
training_input = pytorch.sagemaker_session.upload_data(path=training_dir,
key_prefix='pytorch/mnist')
job_name = utils.unique_name_from_base('test-pytorch-smdebug')
pytorch.fit({'training': training_input}, job_name=job_name)
file_system_type="FSxLustre",
directory_path=FSX_DIR_PATH,
num_records=NUM_RECORDS,
feature_dim=FEATURE_DIM,
)
test_records = FileSystemRecordSet(
file_system_id=file_system_fsx_id,
file_system_type="FSxLustre",
directory_path=FSX_DIR_PATH,
num_records=NUM_RECORDS,
feature_dim=FEATURE_DIM,
channel="test",
)
job_name = unique_name_from_base("tune-kmeans-fsx")
tuner.fit([train_records, test_records], job_name=job_name)
tuner.wait()
best_training_job = tuner.best_training_job()
assert best_training_job
entry_point=SCRIPT,
role=ROLE,
train_instance_count=2,
train_instance_type=instance_type,
sagemaker_session=sagemaker_session,
py_version=tests.integ.PYTHON_VERSION,
script_mode=True,
framework_version=tf_full_version,
distributions=PARAMETER_SERVER_DISTRIBUTION,
)
inputs = estimator.sagemaker_session.upload_data(
path=os.path.join(MNIST_RESOURCE_PATH, "data"), key_prefix="scriptmode/distributed_mnist"
)
with tests.integ.timeout.timeout(minutes=tests.integ.TRAINING_DEFAULT_TIMEOUT_MINUTES):
estimator.fit(inputs=inputs, job_name=unique_name_from_base("test-tf-sm-distributed"))
assert_s3_files_exist(
sagemaker_session,
estimator.model_dir,
["graph.pbtxt", "model.ckpt-0.index", "model.ckpt-0.meta"],
)
def test_dgl_training(sagemaker_session, ecr_image, instance_type):
dgl = MXNet(entry_point=DGL_SCRIPT_PATH,
role='SageMakerRole',
train_instance_count=1,
train_instance_type=instance_type,
sagemaker_session=sagemaker_session,
image_name=ecr_image)
with timeout(minutes=15):
job_name = utils.unique_name_from_base('test-dgl-image')
dgl.fit(job_name=job_name)
tuner = HyperparameterTuner(
estimator,
objective_metric_name,
hyperparameter_ranges,
metric_definitions,
max_jobs=2,
max_parallel_jobs=2,
)
with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
inputs = estimator.sagemaker_session.upload_data(
path=os.path.join(resource_path, "data"), key_prefix="scriptmode/mnist"
)
tuning_job_name = unique_name_from_base("tune-tf-script-mode", max_length=32)
tuner.fit(inputs, job_name=tuning_job_name)
print("Started hyperparameter tuning job with name: " + tuning_job_name)
time.sleep(15)
tuner.wait()
def test_call_fit(base_fit, sagemaker_session):
pca = PCA(base_job_name="pca", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
data = RecordSet(
"s3://{}/{}".format(BUCKET_NAME, PREFIX),
num_records=1,
feature_dim=FEATURE_DIM,
channel="train",
)
pca.fit(data, MINI_BATCH_SIZE)
base_fit.assert_called_once()
assert len(base_fit.call_args[0]) == 2
assert base_fit.call_args[0][0] == data
assert base_fit.call_args[0][1] == MINI_BATCH_SIZE
def test_model_image(sagemaker_session):
randomcutforest = RandomCutForest(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
data = RecordSet(
"s3://{}/{}".format(BUCKET_NAME, PREFIX),
num_records=1,
feature_dim=FEATURE_DIM,
channel="train",
)
randomcutforest.fit(data, MINI_BATCH_SIZE)
model = randomcutforest.create_model()
assert model.image == registry(REGION, "randomcutforest") + "/randomcutforest:1"
def test_predictor_type(sagemaker_session):
randomcutforest = RandomCutForest(sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
data = RecordSet(
"s3://{}/{}".format(BUCKET_NAME, PREFIX),
num_records=1,
feature_dim=FEATURE_DIM,
channel="train",
)
randomcutforest.fit(data, MINI_BATCH_SIZE)
model = randomcutforest.create_model()
predictor = model.deploy(1, TRAIN_INSTANCE_TYPE)
assert isinstance(predictor, RandomCutForestPredictor)
def test_call_fit_none_mini_batch_size(sagemaker_session):
ntm = NTM(base_job_name="ntm", sagemaker_session=sagemaker_session, **ALL_REQ_ARGS)
data = RecordSet(
"s3://{}/{}".format(BUCKET_NAME, PREFIX),
num_records=1,
feature_dim=FEATURE_DIM,
channel="train",
)
ntm.fit(data)