Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _create_pipeline():
"""Implements the chicago taxi pipeline with TFX."""
examples = csv_input(_data_root)
# Brings data into the pipeline or otherwise joins/converts training data.
example_gen = CsvExampleGen(input_base=examples)
# Computes statistics over data for visualization and example validation.
statistics_gen = StatisticsGen(input_data=example_gen.outputs['examples'])
# Generates schema based on statistics files.
infer_schema = SchemaGen(stats=statistics_gen.outputs['output'])
# Performs anomaly detection based on statistics and data schema.
validate_stats = ExampleValidator(
stats=statistics_gen.outputs['output'],
schema=infer_schema.outputs['output'])
return pipeline.Pipeline(
pipeline_name='chicago_taxi_simple',
pipeline_root=_pipeline_root,
components=[
def _create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root: Text,
metadata_path: Text) -> pipeline.Pipeline:
"""Implements the chicago taxi pipeline with TFX."""
examples = external_input(data_root)
# Brings data into the pipeline or otherwise joins/converts training data.
example_gen = CsvExampleGen(input=examples)
return pipeline.Pipeline(
pipeline_name=pipeline_name,
pipeline_root=pipeline_root,
components=[example_gen],
enable_cache=True,
metadata_connection_config=metadata.sqlite_metadata_connection_config(
metadata_path),
additional_pipeline_args={},
)
def _create_pipeline():
"""Implements the chicago taxi pipeline with TFX."""
examples = csv_input(_data_root)
# Brings data into the pipeline or otherwise joins/converts training data.
example_gen = CsvExampleGen(input=examples)
# Computes statistics over data for visualization and example validation.
statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])
# Generates schema based on statistics files.
infer_schema = SchemaGen(statistics=statistics_gen.outputs['statistics'])
# Performs anomaly detection based on statistics and data schema.
validate_stats = ExampleValidator(
statistics=statistics_gen.outputs['statistics'],
schema=infer_schema.outputs['schema'])
return pipeline.Pipeline(
pipeline_name='chicago_taxi_simple',
pipeline_root=_pipeline_root,
components=[
def create_e2e_components(pipeline_root: Text, csv_input_location: Text,
taxi_module_file: Text) -> List[BaseComponent]:
"""Creates components for a simple Chicago Taxi TFX pipeline for testing.
Args:
pipeline_root: The root of the pipeline output.
csv_input_location: The location of the input data directory.
taxi_module_file: The location of the module file for Transform/Trainer.
Returns:
A list of TFX components that constitutes an end-to-end test pipeline.
"""
examples = dsl_utils.csv_input(csv_input_location)
example_gen = CsvExampleGen(input=examples)
statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])
infer_schema = SchemaGen(
statistics=statistics_gen.outputs['statistics'],
infer_feature_shape=False)
validate_stats = ExampleValidator(
statistics=statistics_gen.outputs['statistics'],
schema=infer_schema.outputs['schema'])
transform = Transform(
examples=example_gen.outputs['examples'],
schema=infer_schema.outputs['schema'],
module_file=taxi_module_file)
trainer = Trainer(
module_file=taxi_module_file,
transformed_examples=transform.outputs['transformed_examples'],
schema=infer_schema.outputs['schema'],
transform_graph=transform.outputs['transform_graph'],
def _create_pipeline():
"""Implements the chicago taxi pipeline with TFX."""
examples = csv_input(_data_root)
# Brings data into the pipeline or otherwise joins/converts training data.
example_gen = CsvExampleGen(input_base=examples)
# Computes statistics over data for visualization and example validation.
statistics_gen = StatisticsGen(input_data=example_gen.outputs.examples)
# Generates schema based on statistics files.
infer_schema = SchemaGen(stats=statistics_gen.outputs.output)
# Performs anomaly detection based on statistics and data schema.
validate_stats = ExampleValidator(
stats=statistics_gen.outputs.output, schema=infer_schema.outputs.output)
# Performs transformations and feature engineering in training and serving.
transform = Transform(
input_data=example_gen.outputs.examples,
schema=infer_schema.outputs.output,
module_file=_taxi_module_file)
def _create_pipeline():
"""Implements the chicago taxi pipeline with TFX."""
examples = csv_input(_data_root)
# Brings data into the pipeline or otherwise joins/converts training data.
example_gen = CsvExampleGen(input=examples)
# Computes statistics over data for visualization and example validation.
# pylint: disable=line-too-long
# statistics_gen = StatisticsGen(examples=example_gen.outputs['examples']) # Step 3
# pylint: enable=line-too-long
# Generates schema based on statistics files.
# infer_schema = SchemaGen(
# statistics=statistics_gen.outputs['statistics']) # Step 3
# Performs anomaly detection based on statistics and data schema.
# validate_stats = ExampleValidator( # Step 3
# statistics=statistics_gen.outputs['statistics'], # Step 3
# schema=infer_schema.outputs['schema']) # Step 3
# Performs transformations and feature engineering in training and serving.
def _create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root: Text,
module_file: Text, serving_model_dir: Text,
direct_num_workers: int) -> pipeline.Pipeline:
"""Implements the chicago taxi pipeline with TFX and Kubeflow Pipelines."""
examples = external_input(data_root)
# Brings data into the pipeline or otherwise joins/converts training data.
example_gen = CsvExampleGen(input=examples)
# Computes statistics over data for visualization and example validation.
statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])
# Generates schema based on statistics files.
infer_schema = SchemaGen(
statistics=statistics_gen.outputs['statistics'],
infer_feature_shape=False)
# Performs anomaly detection based on statistics and data schema.
validate_stats = ExampleValidator(
statistics=statistics_gen.outputs['statistics'],
schema=infer_schema.outputs['schema'])
# Performs transformations and feature engineering in training and serving.
transform = Transform(
def _create_pipeline(pipeline_name: Text, pipeline_root: Text, data_root: Text,
module_file: Text, serving_model_dir: Text,
direct_num_workers: int) -> pipeline.Pipeline:
"""Implements the chicago taxi pipeline with TFX and Kubeflow Pipelines."""
examples = external_input(data_root)
# Brings data into the pipeline or otherwise joins/converts training data.
example_gen = CsvExampleGen(input=examples)
# Computes statistics over data for visualization and example validation.
statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])
# Generates schema based on statistics files.
infer_schema = SchemaGen(
statistics=statistics_gen.outputs['statistics'],
infer_feature_shape=False)
# Performs anomaly detection based on statistics and data schema.
validate_stats = ExampleValidator(
statistics=statistics_gen.outputs['statistics'],
schema=infer_schema.outputs['schema'])
# Performs transformations and feature engineering in training and serving.
transform = Transform(
def _create_pipeline(
pipeline_name: Text, pipeline_root: Text, data_root: Text,
module_file: Text, serving_model_dir: Text,
metadata_connection_config: metadata_store_pb2.ConnectionConfig
) -> pipeline.Pipeline:
"""Implements the chicago taxi pipeline with TFX."""
examples = external_input(data_root)
# Brings data into the pipeline or otherwise joins/converts training data.
example_gen = CsvExampleGen(input_base=examples)
# Computes statistics over data for visualization and example validation.
statistics_gen = StatisticsGen(input_data=example_gen.outputs['examples'])
# Generates schema based on statistics files.
infer_schema = SchemaGen(stats=statistics_gen.outputs['output'])
# Performs anomaly detection based on statistics and data schema.
validate_stats = ExampleValidator(
stats=statistics_gen.outputs['output'],
schema=infer_schema.outputs['output'])
# Performs transformations and feature engineering in training and serving.
transform = Transform(
input_data=example_gen.outputs['examples'],
schema=infer_schema.outputs['output'],