Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
@solid
def cache_file_from_s3(context, s3_coord: S3Coordinate) -> str:
# we default the target_key to the last component of the s3 key.
target_key = s3_coord['key'].split('/')[-1]
with get_temp_file_name() as tmp_file:
boto3.client('s3').download_file(
Bucket=s3_coord['bucket'], Key=s3_coord['key'], Filename=tmp_file
)
file_cache = context.resources.file_cache
with open(tmp_file, 'rb') as tmp_file_object:
# returns a handle rather than a path
file_handle = file_cache.write_file_object(target_key, tmp_file_object)
return file_handle.path
return num
@lambda_solid(input_defs=[InputDefinition('num')], output_def=OutputDefinition(Int))
def load_num(num):
return num + 3
@composite_solid(output_defs=[OutputDefinition(Int)])
def test():
return load_num(
num=canonicalize_num(
num=subsample_num(num=ingest_num(num=unzip_num(num=download_num())))
)
)
result = execute_pipeline(
PipelineDefinition(solid_defs=[test]),
{'solids': {'test': {'solids': {'download_num': {'config': 123}}}}},
)
assert result.result_for_handle('test.canonicalize_num').output_value() == 123
assert result.result_for_handle('test.load_num').output_value() == 126
@pipeline
def nothing_pipeline():
done(wait())
def define_read_csv_solid(name):
def _t_fn(info, _inputs):
yield Result(pd.read_csv(info.config['path']))
return SolidDefinition(
name=name,
inputs=[],
outputs=[OutputDefinition()],
config_field=types.Field(types.Dict({
'path': Field(types.Path)
})),
transform_fn=_t_fn
)
assert _hash({'some_int': Field(int)}) != _hash({'another_int': Field(int)})
assert _hash({'same_name': Field(int)}) != _hash({'same_name': Field(str)})
assert _hash({'same_name': Field(int)}) != _hash({'same_name': Field(int, is_optional=True)})
assert _hash({'same_name': Field(int)}) != _hash(
{'same_name': Field(int, is_optional=True, default_value=2)}
)
assert _hash({'same_name': Field(int, is_optional=True)}) != _hash(
{'same_name': Field(int, is_optional=True, default_value=2)}
)
assert _hash({'same_name': Field(int)}) != _hash({'same_name': Field(int, description='desc')})
config={'some_config': Field(String)},
)
def solid_with_context(context):
did_get['yep'] = context.solid_config
@solid(config={'foo': Field(String)})
def node_a(context):
return context.solid_config['foo']
def test_dataframe_table_from_inputs():
called = {}
@solid(input_defs=[InputDefinition('df', DataFrame)])
def df_as_config(_context, df):
assert df.to_dict('list') == {'num1': [1, 3], 'num2': [2, 4]}
called['yup'] = True
@pipeline
def test_pipeline():
df_as_config()
result = execute_pipeline(
test_pipeline,
{
'solids': {
'df_as_config': {
'inputs': {'df': {'table': {'path': script_relative_path('num_table.txt')}}}
}
}
},
)
assert result.success
assert called['yup']
compute_fn=lambda context, args: _set_key_value(did_run_dict, 'step_one', True),
output_def=OutputDefinition(),
)
step_two_solid = single_output_solid(
name='step_two_solid',
input_defs=[InputDefinition('step_one_solid')],
compute_fn=lambda context, args: _set_key_value(did_run_dict, 'step_two', True),
output_def=OutputDefinition(),
)
@pipeline
def pipe():
step_two_solid(step_one_solid())
pipeline_result = execute_pipeline(pipe)
assert pipeline_result.success
for result in pipeline_result.solid_result_list:
assert result.success
assert did_run_dict['step_one'] is True
assert did_run_dict['step_two'] is True
config: 2
ingest_b:
config: 3
context:
''',
'''
solids:
ingest_a:
config: 2
ingest_b:
config: 3
''',
]
for yaml_variant in yaml_variants:
result = execute_pipeline(
define_part_nine_step_one_pipeline(), yaml.load(yaml_variant)
)
assert result.success
assert result.result_for_solid('ingest_a').transformed_value() == 2
assert result.result_for_solid('ingest_b').transformed_value() == 3
assert result.result_for_solid('add_ints').transformed_value() == 5
assert result.result_for_solid('mult_ints').transformed_value() == 6