Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_build_params_missing_substring_positions(self, _):
acon_dict = self._create_acon_dict({
"source_table": self.SOURCE_TABLE,
"target_table": self.TARGET_TABLE,
"source_field": self.SOURCE_FIELD,
"target_partitions": self.TARGET_PARTITIONS,
"select_conditions": self.SELECT_CONDITIONS
})
emr_system = self._create_emr_system()
configuration = self._create_algorithm_configuration(acon_dict)
with pytest.raises(M3DIllegalArgumentException) as ex_info:
AlgorithmFixedLengthStringExtractor(
emr_system,
configuration.get_algorithm_instance(),
configuration.get_algorithm_params()
)
assert str(ex_info.value).startswith("Substring positions specification is missing in the acon-file")
def test_build_params_missing_source_table(self, _):
acon_dict = self._create_acon_dict({
"target_table": self.TARGET_TABLE,
"source_field": self.SOURCE_FIELD,
"target_partitions": self.TARGET_PARTITIONS,
"select_conditions": self.SELECT_CONDITIONS,
"substring_positions": self.SUBSTRING_POSITIONS
})
emr_system = self._create_emr_system()
configuration = self._create_algorithm_configuration(acon_dict)
with pytest.raises(M3DIllegalArgumentException) as ex_info:
AlgorithmFixedLengthStringExtractor(
emr_system,
configuration.get_algorithm_instance(),
configuration.get_algorithm_params()
)
assert str(ex_info.value).startswith("Source table name is missing in the acon-file")
def test_build_params_with_rules_unpartitioned(self, _):
acon_dict = self._create_acon_dict({
"source_table": self.SOURCE_TABLE,
"target_table": self.TARGET_TABLE,
"source_field": self.SOURCE_FIELD,
"select_rules": self.SELECT_RULES,
"substring_positions": self.SUBSTRING_POSITIONS
})
emr_system = self._create_emr_system()
configuration = self._create_algorithm_configuration(acon_dict)
with pytest.raises(M3DIllegalArgumentException) as ex_info:
AlgorithmFixedLengthStringExtractor(
emr_system,
configuration.get_algorithm_instance(),
configuration.get_algorithm_params()
)
assert str(ex_info.value).startswith("Unable to use select_rules for unpartitioned table")
def test_build_params_with_rules_and_conditions(self, _):
acon_dict = self._create_acon_dict({
"source_table": self.SOURCE_TABLE,
"target_table": self.TARGET_TABLE,
"source_field": self.SOURCE_FIELD,
"target_partitions": self.TARGET_PARTITIONS,
"select_conditions": self.SELECT_CONDITIONS,
"select_rules": self.SELECT_RULES,
"substring_positions": self.SUBSTRING_POSITIONS
})
emr_system = self._create_emr_system()
configuration = self._create_algorithm_configuration(acon_dict)
with pytest.raises(M3DIllegalArgumentException) as ex_info:
AlgorithmFixedLengthStringExtractor(
emr_system,
configuration.get_algorithm_instance(),
configuration.get_algorithm_params()
)
assert str(ex_info.value).startswith("Unable to use both select_conditions and select_rules at the same time")
def test_build_params_empty_non_required_fields(self, add_tags_patch):
acon_dict = self._create_acon_dict({
"source_table": self.SOURCE_TABLE,
"target_table": self.TARGET_TABLE,
"source_field": self.SOURCE_FIELD,
"target_partitions": self.TARGET_PARTITIONS,
"select_conditions": self.SELECT_CONDITIONS,
"substring_positions": self.SUBSTRING_POSITIONS
})
emr_system = self._create_emr_system()
configuration = self._create_algorithm_configuration(acon_dict)
algorithm = AlgorithmFixedLengthStringExtractor(
emr_system,
configuration.get_algorithm_instance(),
configuration.get_algorithm_params()
)
generated_params = algorithm.build_params()
expected_full_source_table = self._create_full_table_name(self.SOURCE_TABLE)
expected_full_target_table = self._create_full_table_name(self.TARGET_TABLE)
expected_params = {
"source_table": expected_full_source_table,
"target_table": expected_full_target_table,
"source_field": self.SOURCE_FIELD,
"target_partitions": self.TARGET_PARTITIONS,
"select_conditions": self.SELECT_CONDITIONS,
"substring_positions": self.SUBSTRING_POSITIONS
def test_build_params_with_rules(self, add_tags_patch):
acon_dict = self._create_acon_dict({
"source_table": self.SOURCE_TABLE,
"target_table": self.TARGET_TABLE,
"source_field": self.SOURCE_FIELD,
"target_partitions": self.TARGET_PARTITIONS,
"select_rules": self.SELECT_RULES,
"substring_positions": self.SUBSTRING_POSITIONS
})
emr_system = self._create_emr_system()
configuration = self._create_algorithm_configuration(acon_dict)
algorithm = AlgorithmFixedLengthStringExtractor(
emr_system,
configuration.get_algorithm_instance(),
configuration.get_algorithm_params()
)
generated_params = algorithm.build_params()
expected_full_source_table = self._create_full_table_name(self.SOURCE_TABLE)
expected_full_target_table = self._create_full_table_name(self.TARGET_TABLE)
expected_params = {
"source_table": expected_full_source_table,
"target_table": expected_full_target_table,
"source_field": self.SOURCE_FIELD,
"target_partitions": self.TARGET_PARTITIONS,
"select_rules": self.SELECT_RULES,
"substring_positions": self.SUBSTRING_POSITIONS
def test_build_params_missing_target_table(self, _):
acon_dict = self._create_acon_dict({
"source_table": self.SOURCE_TABLE,
"source_field": self.SOURCE_FIELD,
"target_partitions": self.TARGET_PARTITIONS,
"select_conditions": self.SELECT_CONDITIONS,
"substring_positions": self.SUBSTRING_POSITIONS
})
emr_system = self._create_emr_system()
configuration = self._create_algorithm_configuration(acon_dict)
with pytest.raises(M3DIllegalArgumentException) as ex_info:
AlgorithmFixedLengthStringExtractor(
emr_system,
configuration.get_algorithm_instance(),
configuration.get_algorithm_params()
)
assert str(ex_info.value).startswith("Target table name is missing in the acon-file")
def test_build_params_with_conditions_unpartitioned(self, _):
acon_dict = self._create_acon_dict({
"source_table": self.SOURCE_TABLE,
"target_table": self.TARGET_TABLE,
"source_field": self.SOURCE_FIELD,
"select_conditions": self.SELECT_CONDITIONS,
"substring_positions": self.SUBSTRING_POSITIONS
})
emr_system = self._create_emr_system()
configuration = self._create_algorithm_configuration(acon_dict)
with pytest.raises(M3DIllegalArgumentException) as ex_info:
AlgorithmFixedLengthStringExtractor(
emr_system,
configuration.get_algorithm_instance(),
configuration.get_algorithm_params()
)
assert str(ex_info.value).startswith("Unable to use select_conditions for unpartitioned table")
def test_build_params_with_conditions(self, add_tags_patch):
acon_dict = self._create_acon_dict({
"source_table": self.SOURCE_TABLE,
"target_table": self.TARGET_TABLE,
"source_field": self.SOURCE_FIELD,
"target_partitions": self.TARGET_PARTITIONS,
"select_conditions": self.SELECT_CONDITIONS,
"substring_positions": self.SUBSTRING_POSITIONS,
"metadata_update_strategy": self.METADATA_UPDATE_STRATEGY
})
emr_system = self._create_emr_system()
configuration = self._create_algorithm_configuration(acon_dict)
algorithm = AlgorithmFixedLengthStringExtractor(
emr_system,
configuration.get_algorithm_instance(),
configuration.get_algorithm_params()
)
generated_params = algorithm.build_params()
expected_full_source_table = self._create_full_table_name(self.SOURCE_TABLE)
expected_full_target_table = self._create_full_table_name(self.TARGET_TABLE)
expected_params = {
"source_table": expected_full_source_table,
"target_table": expected_full_target_table,
"source_field": self.SOURCE_FIELD,
"target_partitions": self.TARGET_PARTITIONS,
"select_conditions": self.SELECT_CONDITIONS,
"metadata_update_strategy": self.METADATA_UPDATE_STRATEGY,
def __init__(self, execution_system, algorithm_instance, algorithm_params):
"""
Initialize Algorithm Clickstream Preprocessor
:param execution_system: an instance of EMRSystem object
:param algorithm_instance: name of the algorithm instance
:param algorithm_params: algorithm configuration
"""
super(AlgorithmFixedLengthStringExtractor,
self).__init__(execution_system, algorithm_instance, algorithm_params)
self.validate_parameters()
self.source_table = self._execution_system.db_lake + "." + self._parameters["source_table"]
self.target_table = self._execution_system.db_lake + "." + self._parameters["target_table"]
self.metadata_update_strategy = self._parameters.get("metadata_update_strategy", None)
execution_system.add_cluster_tags({
EMRSystem.EMRClusterTag.SOURCE_TABLE: self.source_table,
EMRSystem.EMRClusterTag.TARGET_TABLE: self.target_table
})