Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])
# Generates schema based on statistics files.
infer_schema = SchemaGen(statistics=statistics_gen.outputs['statistics'])
# Performs anomaly detection based on statistics and data schema.
validate_stats = ExampleValidator(
statistics=statistics_gen.outputs['statistics'],
schema=infer_schema.outputs['schema'])
return pipeline.Pipeline(
pipeline_name=pipeline_name,
pipeline_root=pipeline_root,
components=[example_gen, statistics_gen, infer_schema, validate_stats],
enable_cache=True,
metadata_connection_config=metadata.sqlite_metadata_connection_config(
metadata_path),
additional_pipeline_args={},
)
model_export=trainer.outputs['output'],
model_blessing=model_validator.outputs['blessing'],
# Empty data_spec.example_splits will result in using all splits.
data_spec=bulk_inferrer_pb2.DataSpec(),
model_spec=bulk_inferrer_pb2.ModelSpec())
return pipeline.Pipeline(
pipeline_name=pipeline_name,
pipeline_root=pipeline_root,
components=[
training_example_gen, inference_example_gen, statistics_gen,
infer_schema, validate_stats, transform, trainer, model_analyzer,
model_validator, bulk_inferrer
],
enable_cache=True,
metadata_connection_config=metadata.sqlite_metadata_connection_config(
metadata_path),
# TODO(b/141578059): The multi-processing API might change.
beam_pipeline_args=['--direct_num_workers=%d' % direct_num_workers])
pusher = Pusher(
model=trainer.outputs['model'],
model_blessing=model_validator.outputs['blessing'],
push_destination=pusher_pb2.PushDestination(
filesystem=pusher_pb2.PushDestination.Filesystem(
base_directory=serving_model_dir)))
return pipeline.Pipeline(
pipeline_name=pipeline_name,
pipeline_root=pipeline_root,
components=[
example_gen, hello, statistics_gen, infer_schema, validate_stats,
transform, trainer, model_analyzer, model_validator, pusher
],
enable_cache=True,
metadata_connection_config=metadata.sqlite_metadata_connection_config(
metadata_path))
pusher = Pusher(
model=trainer.outputs['model'],
model_blessing=model_validator.outputs['blessing'],
push_destination=pusher_pb2.PushDestination(
filesystem=pusher_pb2.PushDestination.Filesystem(
base_directory=serving_model_dir)))
return pipeline.Pipeline(
pipeline_name=pipeline_name,
pipeline_root=pipeline_root,
components=[
example_gen, statistics_gen, infer_schema, validate_stats, transform,
trainer, evaluator, model_validator, pusher
],
enable_cache=True,
metadata_connection_config=metadata.sqlite_metadata_connection_config(
metadata_path),
)
pusher = Pusher(
model=trainer.outputs['model'],
model_blessing=model_validator.outputs['blessing'],
push_destination=pusher_pb2.PushDestination(
filesystem=pusher_pb2.PushDestination.Filesystem(
base_directory=serving_model_dir)))
return pipeline.Pipeline(
pipeline_name=pipeline_name,
pipeline_root=pipeline_root,
components=[
example_gen, statistics_gen, infer_schema, validate_stats, transform,
trainer, model_analyzer, model_validator, pusher
],
enable_cache=True,
metadata_connection_config=metadata.sqlite_metadata_connection_config(
metadata_path),
# TODO(b/141578059): The multi-processing API might change.
beam_pipeline_args=['--direct_num_workers=%d' % direct_num_workers])
pusher = Pusher(
model=trainer.outputs['model'],
model_blessing=model_validator.outputs['blessing'],
push_destination=pusher_pb2.PushDestination(
filesystem=pusher_pb2.PushDestination.Filesystem(
base_directory=serving_model_dir)))
return pipeline.Pipeline(
pipeline_name=pipeline_name,
pipeline_root=pipeline_root,
components=[
example_gen, statistics_gen, infer_schema, validate_stats, transform,
trainer, model_analyzer, model_validator, pusher
],
enable_cache=True,
metadata_connection_config=metadata.sqlite_metadata_connection_config(
metadata_path),
)
"""
if not pipeline_name:
pipeline_name = ('interactive-%s' %
datetime.datetime.now().isoformat().replace(':', '_'))
if not pipeline_root:
pipeline_root = tempfile.mkdtemp(prefix='tfx-%s-' % pipeline_name)
absl.logging.warning(
'InteractiveContext pipeline_root argument not provided: using '
'temporary directory %s as root for pipeline outputs.', pipeline_root)
if not metadata_connection_config:
# TODO(ccy): consider reconciling similar logic here with other instances
# in tfx/orchestration/...
metadata_sqlite_path = os.path.join(pipeline_root,
self._DEFAULT_SQLITE_FILENAME)
metadata_connection_config = metadata.sqlite_metadata_connection_config(
metadata_sqlite_path)
absl.logging.warning(
'InteractiveContext metadata_connection_config not provided: using '
'SQLite ML Metadata database at %s.', metadata_sqlite_path)
self.pipeline_name = pipeline_name
self.pipeline_root = pipeline_root
self.metadata_connection_config = metadata_connection_config
# Register IPython formatters.
notebook_formatters.register_formatters()
# Register artifact visualizations.
standard_visualizations.register_standard_visualizations()
pusher = Pusher(
model=trainer.outputs['model'],
model_blessing=model_validator.outputs['blessing'],
push_destination=pusher_pb2.PushDestination(
filesystem=pusher_pb2.PushDestination.Filesystem(
base_directory=serving_model_dir)))
return pipeline.Pipeline(
pipeline_name=pipeline_name,
pipeline_root=pipeline_root,
components=[
example_gen, statistics_gen, infer_schema, validate_stats, transform,
trainer, model_analyzer, model_validator, pusher
],
enable_cache=True,
metadata_connection_config=metadata.sqlite_metadata_connection_config(
metadata_path),
# TODO(b/141578059): The multi-processing API might change.
beam_pipeline_args=['--direct_num_workers=%d' % direct_num_workers])
"""
if not pipeline_name:
pipeline_name = ('interactive-%s' %
datetime.datetime.now().isoformat().replace(':', '_'))
if not pipeline_root:
pipeline_root = tempfile.mkdtemp(prefix='tfx-%s-' % pipeline_name)
absl.logging.warning(
'InteractiveContext pipeline_root argument not provided: using '
'temporary directory %s as root for pipeline outputs.', pipeline_root)
if not metadata_connection_config:
# TODO(ccy): consider reconciling similar logic here with other instances
# in tfx/orchestration/...
metadata_sqlite_path = os.path.join(pipeline_root,
self._DEFAULT_SQLITE_FILENAME)
metadata_connection_config = metadata.sqlite_metadata_connection_config(
metadata_sqlite_path)
absl.logging.warning(
'InteractiveContext metadata_connection_config not provided: using '
'SQLite ML Metadata database at %s.', metadata_sqlite_path)
self.pipeline_name = pipeline_name
self.pipeline_root = pipeline_root
self.metadata_connection_config = metadata_connection_config
# Register IPython formatters.
notebook_formatters.register_formatters()
# Register artifact visualizations.
standard_visualizations.register_standard_visualizations()