Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
value='gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000'),
deploy_webapp: dsl.PipelineParam = dsl.PipelineParam(name='deploy-webapp', value='true'),
data_dir: dsl.PipelineParam = dsl.PipelineParam(
name='data-dir', value='gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/')):
train = dsl.ContainerOp(
name='train',
image='gcr.io/google-samples/ml-pipeline-t2ttrain',
arguments=["--data-dir", data_dir,
"--checkpoint-dir", checkpoint_dir,
"--model-dir", '%s/%s/model_output' % (working_dir, '{{workflow.name}}'),
"--train-steps", train_steps, "--deploy-webapp", deploy_webapp],
file_outputs={'output': '/tmp/output'}
).apply(gcp.use_gcp_secret('user-gcp-sa'))
serve = dsl.ContainerOp(
name='serve',
image='gcr.io/google-samples/ml-pipeline-kubeflow-tfserve',
arguments=["--model_name", 'ghsumm-%s' % ('{{workflow.name}}',),
"--model_path", '%s/%s/model_output/export' % (working_dir, '{{workflow.name}}')
]
)
serve.after(train)
train.set_gpu_limit(4)
with dsl.Condition(train.output == 'true'):
webapp = dsl.ContainerOp(
name='webapp',
image='gcr.io/google-samples/ml-pipeline-webapp-launcher',
arguments=["--model_name", 'ghsumm-%s' % ('{{workflow.name}}',),
def bikes_weather( #pylint: disable=unused-argument
working_dir: GCSPath = 'gs://YOUR_GCS_DIR_HERE',
data_dir: GCSPath = 'gs://aju-dev-demos-codelabs/bikes_weather/',
epochs: 'Integer' = 1,
steps_per_epoch: 'Integer' = -1 , # if -1, don't override normal calcs based on dataset size
load_checkpoint: String = ''
):
train = train_op(
data_dir=data_dir,
workdir='%s/%s' % (working_dir, dsl.RUN_ID_PLACEHOLDER),
epochs=epochs, steps_per_epoch=steps_per_epoch,
load_checkpoint=load_checkpoint
).apply(gcp.use_gcp_secret('user-gcp-sa'))
serve = serve_op(
model_path=train.outputs['train_output_path'],
model_name='bikesw'
).apply(gcp.use_gcp_secret('user-gcp-sa'))
train.set_gpu_limit(1)
predict_mode='local',
):
output_template = str(output) + '/{{workflow.uid}}/{{pod.name}}/data'
# set the flag to use GPU trainer
use_gpu = False
preprocess = dataflow_tf_transform_op(
training_data_file_pattern=train,
evaluation_data_file_pattern=evaluation,
schema=schema,
gcp_project=project,
run_mode=preprocess_mode,
preprocessing_module='',
transformed_data_dir=output_template
).apply(gcp.use_gcp_secret('user-gcp-sa'))
training = kubeflow_tf_training_op(
transformed_data_dir=preprocess.output,
schema=schema,
learning_rate=learning_rate,
hidden_layer_size=hidden_layer_size,
steps=steps,
target=target,
preprocessing_module='',
training_output_dir=output_template
).apply(gcp.use_gcp_secret('user-gcp-sa'))
if use_gpu:
training.image = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer-gpu:0517114dc2b365a4a6d95424af6157ead774eff3',
training.set_gpu_limit(1)
deploy = kubeflow_deploy_op(
model_dir=str(training.output) + '/export/export',
server_name=tf_server_name
)
else:
deploy = kubeflow_deploy_op(
cluster_name=project,
model_dir=str(training.output) + '/export/export',
pvc_name=vop.outputs["name"],
server_name=tf_server_name
)
steps = [validation, preprocess, training, analysis, prediction, cm, roc, deploy]
for step in steps:
if platform == 'GCP':
step.apply(gcp.use_gcp_secret('user-gcp-sa'))
else:
step.apply(onprem.mount_pvc(vop.outputs["name"], 'local-storage', output))
def get_default_pipeline_operator_funcs() -> List[OpFunc]:
"""Returns a default list of pipeline operator functions.
Returns:
A list of functions with type OpFunc.
"""
# Enables authentication for GCP services in a typical GKE Kubeflow
# installation.
gcp_secret_op = gcp.use_gcp_secret(_KUBEFLOW_GCP_SECRET_NAME)
# Mounts configmap containing the MySQL DB to use for logging metadata.
mount_config_map_op = _mount_config_map_op('metadata-configmap')
# Mounts the secret containing the MySQL DB password.
mysql_password_op = _mount_secret_op('mysql-credential')
return [gcp_secret_op, mount_config_map_op, mysql_password_op]
server_name=tf_server_name
)
else:
deploy = kubeflow_deploy_op(
cluster_name=project,
model_dir=str(training.output) + '/export/export',
pvc_name='users-pvc',
# pvc_name=vop.outputs["name"],
server_name=tf_server_name,
service_type='NodePort',
)
steps = [validation, preprocess, training, analysis, prediction, cm, roc, deploy]
for step in steps:
if platform == 'GCP':
step.apply(gcp.use_gcp_secret('user-gcp-sa'))
else:
step.apply(onprem.mount_pvc('users-pvc', 'local-storage', output))
# step.apply(onprem.mount_pvc(vop.outputs["name"], 'local-storage', output))
"--community-area-list-path", read_metadata.outputs["community_area_list_path"],
"--znorm-stats-path", read_metadata.outputs["znorm_stats_path"],
"--temp-dir", temp_dir,
"--runner", dataflow_runner
],
file_outputs={
"train_tfrecord_path": "/train_tfrecord_path.txt",
"eval_tfrecord_path": "/eval_tfrecord_path.txt",
"eval_raw_tfrecord_path": "/eval_raw_tfrecord_path.txt",
"znorm_stats": "/znorm_stats.txt",
"n_areas": "/n_areas.txt",
"n_windows_train": "/n_windows_train.txt",
"n_windows_eval": "/n_windows_eval.txt",
"tft_artifacts_dir": "/tft_artifacts_dir.txt"
}
).apply(gcp.use_gcp_secret('user-gcp-sa'))
data_validation = dsl.ContainerOp(
name='data_validation',
image='gcr.io/ciandt-cognitive-sandbox/chicago-taxi-forecast/data-validation:latest',
command=["python3", "/app/data_validation.py"],
arguments=[
"--input-data-path", bq2tfrecord.outputs["eval_raw_tfrecord_path"],
"--output-dir", artifacts_dir
],
file_outputs={
"schema": "/schema.txt"
},
output_artifact_paths={
'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json'
}
).apply(gcp.use_gcp_secret('user-gcp-sa')).after(bq2tfrecord)
tfteval = dsl.ContainerOp(
name = 'tft-eval',
image = 'gcr.io/google-samples/ml-pipeline-dataflow-tftbq-taxi',
arguments = [ "--input_handle", input_handle_eval, "--outfile_prefix", outfile_prefix_eval,
"--working_dir", '%s/%s/tft-eval' % (working_dir, '{{workflow.name}}'),
"--project", project,
"--mode", preprocess_mode,
"--setup_file", tft_setup_file,
"--max-rows", '5000',
"--ts1", ts1,
"--ts2", ts2,
"--stage", "eval",
"--preprocessing-module", preprocessing_module1]
# file_outputs = {'transformed': '/output.txt'}
).apply(gcp.use_gcp_secret('user-gcp-sa'))
tfttrain = dsl.ContainerOp(
name = 'tft-train',
image = 'gcr.io/google-samples/ml-pipeline-dataflow-tftbq-taxi',
arguments = [ "--input_handle", input_handle_train, "--outfile_prefix", outfile_prefix_train,
"--working_dir", '%s/%s/tft-train' % (working_dir, '{{workflow.name}}'),
"--project", project,
"--mode", preprocess_mode,
"--setup_file", tft_setup_file,
"--max_rows", max_rows,
"--ts1", ts1,
"--ts2", ts2,
"--stage", "train",
"--preprocessing_module", preprocessing_module1]
).apply(gcp.use_gcp_secret('user-gcp-sa'))
tfteval2 = dsl.ContainerOp(
name = 'tft-eval2',
# file_outputs = {'transformed': '/output.txt'}
).apply(gcp.use_gcp_secret('user-gcp-sa'))
tfttrain = dsl.ContainerOp(
name = 'tft-train',
image = 'gcr.io/google-samples/ml-pipeline-dataflow-tftbq-taxi',
arguments = [ "--input_handle", input_handle_train, "--outfile_prefix", outfile_prefix_train,
"--working_dir", '%s/%s/tft-train' % (working_dir, '{{workflow.name}}'),
"--project", project,
"--mode", preprocess_mode,
"--setup_file", tft_setup_file,
"--max_rows", max_rows,
"--ts1", ts1,
"--ts2", ts2,
"--stage", "train",
"--preprocessing_module", preprocessing_module1]
).apply(gcp.use_gcp_secret('user-gcp-sa'))
tfteval2 = dsl.ContainerOp(
name = 'tft-eval2',
image = 'gcr.io/google-samples/ml-pipeline-dataflow-tftbq-taxi',
arguments = [ "--input_handle", input_handle_eval, "--outfile_prefix", outfile_prefix_eval,
"--working_dir", '%s/%s/tft-eval2' % (working_dir, '{{workflow.name}}'),
"--project", project,
"--mode", preprocess_mode,
"--setup_file", tft_setup_file,
"--max_rows", '5000',
"--ts1", ts1,
"--ts2", ts2,
"--stage", "eval",
"--preprocessing_module", preprocessing_module2]
).apply(gcp.use_gcp_secret('user-gcp-sa'))
tfttrain2 = dsl.ContainerOp(
name = 'tft-train2',
def pipeline(gcs_bucket_name=''):
bq2gcs_op = comp.load_component_from_file(BQ2GCS_YAML)
bq2gcs = bq2gcs_op(
input_bucket=gcs_bucket_name,
).apply(gcp.use_gcp_secret('user-gcp-sa'))
trainjob_op = comp.load_component_from_file(TRAINJOB_YAML)
trainjob = trainjob_op(
input_bucket=gcs_bucket_name,
).apply(gcp.use_gcp_secret('user-gcp-sa'))
deploymodel_op = comp.load_component_from_file(DEPLOYMODEL_YAML)
deploymodel = deploymodel_op(
input_bucket=gcs_bucket_name,
).apply(gcp.use_gcp_secret('user-gcp-sa'))
trainjob.after(bq2gcs)
deploymodel.after(trainjob)