How to use the m3d.hadoop.algorithm.algorithm_fixed_length_string_extractor.AlgorithmFixedLengthStringExtractor function in m3d

To help you get started, we’ve selected a few m3d examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github adidas / m3d-api / test / unit / m3d / hadoop / algorithm / test_algorithm_fixed_length_string_extractor.py View on Github external
def test_build_params_missing_substring_positions(self, _):
        acon_dict = self._create_acon_dict({
            "source_table": self.SOURCE_TABLE,
            "target_table": self.TARGET_TABLE,
            "source_field": self.SOURCE_FIELD,
            "target_partitions": self.TARGET_PARTITIONS,
            "select_conditions": self.SELECT_CONDITIONS
        })

        emr_system = self._create_emr_system()
        configuration = self._create_algorithm_configuration(acon_dict)

        with pytest.raises(M3DIllegalArgumentException) as ex_info:
            AlgorithmFixedLengthStringExtractor(
                emr_system,
                configuration.get_algorithm_instance(),
                configuration.get_algorithm_params()
            )

        assert str(ex_info.value).startswith("Substring positions specification is missing in the acon-file")
github adidas / m3d-api / test / unit / m3d / hadoop / algorithm / test_algorithm_fixed_length_string_extractor.py View on Github external
def test_build_params_missing_source_table(self, _):
        acon_dict = self._create_acon_dict({
            "target_table": self.TARGET_TABLE,
            "source_field": self.SOURCE_FIELD,
            "target_partitions": self.TARGET_PARTITIONS,
            "select_conditions": self.SELECT_CONDITIONS,
            "substring_positions": self.SUBSTRING_POSITIONS
        })

        emr_system = self._create_emr_system()
        configuration = self._create_algorithm_configuration(acon_dict)

        with pytest.raises(M3DIllegalArgumentException) as ex_info:
            AlgorithmFixedLengthStringExtractor(
                emr_system,
                configuration.get_algorithm_instance(),
                configuration.get_algorithm_params()
            )

        assert str(ex_info.value).startswith("Source table name is missing in the acon-file")
github adidas / m3d-api / test / unit / m3d / hadoop / algorithm / test_algorithm_fixed_length_string_extractor.py View on Github external
def test_build_params_with_rules_unpartitioned(self, _):
        acon_dict = self._create_acon_dict({
            "source_table": self.SOURCE_TABLE,
            "target_table": self.TARGET_TABLE,
            "source_field": self.SOURCE_FIELD,
            "select_rules": self.SELECT_RULES,
            "substring_positions": self.SUBSTRING_POSITIONS
        })

        emr_system = self._create_emr_system()
        configuration = self._create_algorithm_configuration(acon_dict)

        with pytest.raises(M3DIllegalArgumentException) as ex_info:
            AlgorithmFixedLengthStringExtractor(
                emr_system,
                configuration.get_algorithm_instance(),
                configuration.get_algorithm_params()
            )

        assert str(ex_info.value).startswith("Unable to use select_rules for unpartitioned table")
github adidas / m3d-api / test / unit / m3d / hadoop / algorithm / test_algorithm_fixed_length_string_extractor.py View on Github external
def test_build_params_with_rules_and_conditions(self, _):
        acon_dict = self._create_acon_dict({
            "source_table": self.SOURCE_TABLE,
            "target_table": self.TARGET_TABLE,
            "source_field": self.SOURCE_FIELD,
            "target_partitions": self.TARGET_PARTITIONS,
            "select_conditions": self.SELECT_CONDITIONS,
            "select_rules": self.SELECT_RULES,
            "substring_positions": self.SUBSTRING_POSITIONS
        })

        emr_system = self._create_emr_system()
        configuration = self._create_algorithm_configuration(acon_dict)

        with pytest.raises(M3DIllegalArgumentException) as ex_info:
            AlgorithmFixedLengthStringExtractor(
                emr_system,
                configuration.get_algorithm_instance(),
                configuration.get_algorithm_params()
            )

        assert str(ex_info.value).startswith("Unable to use both select_conditions and select_rules at the same time")
github adidas / m3d-api / test / unit / m3d / hadoop / algorithm / test_algorithm_fixed_length_string_extractor.py View on Github external
def test_build_params_empty_non_required_fields(self, add_tags_patch):
        acon_dict = self._create_acon_dict({
            "source_table": self.SOURCE_TABLE,
            "target_table": self.TARGET_TABLE,
            "source_field": self.SOURCE_FIELD,
            "target_partitions": self.TARGET_PARTITIONS,
            "select_conditions": self.SELECT_CONDITIONS,
            "substring_positions": self.SUBSTRING_POSITIONS
        })

        emr_system = self._create_emr_system()
        configuration = self._create_algorithm_configuration(acon_dict)
        algorithm = AlgorithmFixedLengthStringExtractor(
            emr_system,
            configuration.get_algorithm_instance(),
            configuration.get_algorithm_params()
        )

        generated_params = algorithm.build_params()

        expected_full_source_table = self._create_full_table_name(self.SOURCE_TABLE)
        expected_full_target_table = self._create_full_table_name(self.TARGET_TABLE)
        expected_params = {
            "source_table": expected_full_source_table,
            "target_table": expected_full_target_table,
            "source_field": self.SOURCE_FIELD,
            "target_partitions": self.TARGET_PARTITIONS,
            "select_conditions": self.SELECT_CONDITIONS,
            "substring_positions": self.SUBSTRING_POSITIONS
github adidas / m3d-api / test / unit / m3d / hadoop / algorithm / test_algorithm_fixed_length_string_extractor.py View on Github external
def test_build_params_with_rules(self, add_tags_patch):
        acon_dict = self._create_acon_dict({
            "source_table": self.SOURCE_TABLE,
            "target_table": self.TARGET_TABLE,
            "source_field": self.SOURCE_FIELD,
            "target_partitions": self.TARGET_PARTITIONS,
            "select_rules": self.SELECT_RULES,
            "substring_positions": self.SUBSTRING_POSITIONS
        })

        emr_system = self._create_emr_system()
        configuration = self._create_algorithm_configuration(acon_dict)
        algorithm = AlgorithmFixedLengthStringExtractor(
            emr_system,
            configuration.get_algorithm_instance(),
            configuration.get_algorithm_params()
        )

        generated_params = algorithm.build_params()

        expected_full_source_table = self._create_full_table_name(self.SOURCE_TABLE)
        expected_full_target_table = self._create_full_table_name(self.TARGET_TABLE)
        expected_params = {
            "source_table": expected_full_source_table,
            "target_table": expected_full_target_table,
            "source_field": self.SOURCE_FIELD,
            "target_partitions": self.TARGET_PARTITIONS,
            "select_rules": self.SELECT_RULES,
            "substring_positions": self.SUBSTRING_POSITIONS
github adidas / m3d-api / test / unit / m3d / hadoop / algorithm / test_algorithm_fixed_length_string_extractor.py View on Github external
def test_build_params_missing_target_table(self, _):
        acon_dict = self._create_acon_dict({
            "source_table": self.SOURCE_TABLE,
            "source_field": self.SOURCE_FIELD,
            "target_partitions": self.TARGET_PARTITIONS,
            "select_conditions": self.SELECT_CONDITIONS,
            "substring_positions": self.SUBSTRING_POSITIONS
        })

        emr_system = self._create_emr_system()
        configuration = self._create_algorithm_configuration(acon_dict)

        with pytest.raises(M3DIllegalArgumentException) as ex_info:
            AlgorithmFixedLengthStringExtractor(
                emr_system,
                configuration.get_algorithm_instance(),
                configuration.get_algorithm_params()
            )

        assert str(ex_info.value).startswith("Target table name is missing in the acon-file")
github adidas / m3d-api / test / unit / m3d / hadoop / algorithm / test_algorithm_fixed_length_string_extractor.py View on Github external
def test_build_params_with_conditions_unpartitioned(self, _):
        acon_dict = self._create_acon_dict({
            "source_table": self.SOURCE_TABLE,
            "target_table": self.TARGET_TABLE,
            "source_field": self.SOURCE_FIELD,
            "select_conditions": self.SELECT_CONDITIONS,
            "substring_positions": self.SUBSTRING_POSITIONS
        })

        emr_system = self._create_emr_system()
        configuration = self._create_algorithm_configuration(acon_dict)

        with pytest.raises(M3DIllegalArgumentException) as ex_info:
            AlgorithmFixedLengthStringExtractor(
                emr_system,
                configuration.get_algorithm_instance(),
                configuration.get_algorithm_params()
            )

        assert str(ex_info.value).startswith("Unable to use select_conditions for unpartitioned table")
github adidas / m3d-api / test / unit / m3d / hadoop / algorithm / test_algorithm_fixed_length_string_extractor.py View on Github external
def test_build_params_with_conditions(self, add_tags_patch):
        acon_dict = self._create_acon_dict({
            "source_table": self.SOURCE_TABLE,
            "target_table": self.TARGET_TABLE,
            "source_field": self.SOURCE_FIELD,
            "target_partitions": self.TARGET_PARTITIONS,
            "select_conditions": self.SELECT_CONDITIONS,
            "substring_positions": self.SUBSTRING_POSITIONS,
            "metadata_update_strategy": self.METADATA_UPDATE_STRATEGY
        })

        emr_system = self._create_emr_system()
        configuration = self._create_algorithm_configuration(acon_dict)
        algorithm = AlgorithmFixedLengthStringExtractor(
            emr_system,
            configuration.get_algorithm_instance(),
            configuration.get_algorithm_params()
        )

        generated_params = algorithm.build_params()

        expected_full_source_table = self._create_full_table_name(self.SOURCE_TABLE)
        expected_full_target_table = self._create_full_table_name(self.TARGET_TABLE)
        expected_params = {
            "source_table": expected_full_source_table,
            "target_table": expected_full_target_table,
            "source_field": self.SOURCE_FIELD,
            "target_partitions": self.TARGET_PARTITIONS,
            "select_conditions": self.SELECT_CONDITIONS,
            "metadata_update_strategy": self.METADATA_UPDATE_STRATEGY,
github adidas / m3d-api / m3d / hadoop / algorithm / algorithm_fixed_length_string_extractor.py View on Github external
def __init__(self, execution_system, algorithm_instance, algorithm_params):
        """
        Initialize Algorithm Clickstream Preprocessor

        :param execution_system: an instance of EMRSystem object
        :param algorithm_instance: name of the algorithm instance
        :param algorithm_params: algorithm configuration
        """

        super(AlgorithmFixedLengthStringExtractor,
              self).__init__(execution_system, algorithm_instance, algorithm_params)

        self.validate_parameters()

        self.source_table = self._execution_system.db_lake + "." + self._parameters["source_table"]
        self.target_table = self._execution_system.db_lake + "." + self._parameters["target_table"]
        self.metadata_update_strategy = self._parameters.get("metadata_update_strategy", None)

        execution_system.add_cluster_tags({
            EMRSystem.EMRClusterTag.SOURCE_TABLE: self.source_table,
            EMRSystem.EMRClusterTag.TARGET_TABLE: self.target_table
        })