How to use the m3d.system.data_system.DataSystem function in m3d

To help you get started, we’ve selected a few m3d examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github adidas / m3d-api / m3d / hadoop / emr / emr_system.py View on Github external
self.api_action_timeout_seconds = params_system["api_action_timeout_seconds"]
        self.api_action_polling_interval_seconds = params_system["api_action_polling_interval_seconds"]
        self.api_long_timeout_seconds = params_system["api_long_timeout_seconds"]
        self.aws_region = params_system["aws_region"]
        self.packages_to_deploy = params_system["packages_to_deploy"]
        self.configs_to_deploy = params_system["configs_to_deploy"]

        # base directories
        self.s3_dir_base = params_system["s3_dir_base"]

        # defined sub-directories
        self.subdir_archive = params_system["subdir"]["archive"]
        self.subdir_header = params_system["subdir"]["header"]
        self.subdir_config = params_system["subdir"]["config"]
        self.subdir_data = params_system["subdir"]["data"]
        self.subdir_data_backup = DataSystem.DirectoryName.DATA_BACKUP
        self.subdir_error = params_system["subdir"]["error"]
        self.subdir_work = params_system["subdir"]["work"]
        self.subdir_log = params_system["subdir"]["log"]
        self.subdir_apps = params_system["subdir"]["apps"]
        self.subdir_m3d_engine = params_system["subdir"]["m3d_engine"]
        self.subdir_loading = params_system["subdir"]["loading"]
        self.subdir_full_load = params_system["subdir"]["full_load"]
        self.subdir_delta_load = params_system["subdir"]["delta_load"]
        self.subdir_append_load = params_system["subdir"]["append_load"]
        self.subdir_black_whole = params_system["subdir"]["black_whole"]
        self.subdir_credentials = params_system["subdir"]["credentials"]
        self.subdir_keytab = params_system["subdir"]["keytab"]
        self.subdir_tmp = params_system["subdir"]["tmp"]

        # deployment directories of M3D application and metadata (tconx)
        self.subdir_code = params_system["subdir"]["m3d"]
github adidas / m3d-api / m3d / hadoop / emr / emr_system.py View on Github external
:return: None. Throws M3DEMRException if emr_cluster_client member of EMRSystem is None
    """
    def wrapper(*args, **kwargs):
        emr_system = args[0]  # this is self

        if emr_system.emr_cluster_client is None:
            raise M3DEMRException(
                "EMRClusterClient is not initiated. EMRSystem.{}() method cannot run without a valid emr_cluster_id "
                "passed to the constructor of EMRSystem.".format(func.__name__))

        return func(*args, **kwargs)

    return wrapper


class EMRSystem(DataSystem):
    DEFAULT_ID_LENGTH = 10
    DATETIME_FORMAT = "%Y%m%dT%H%M%S"

    class EMRClusterTag(object):
        API_METHOD = "ApiMethod"
        SYSTEM = "System"
        ENVIRONMENT = "Environment"
        ALGORITHM_INSTANCE = "AlgorithmInstance"
        ALGORITHM_CLASS = "AlgorithmClass"
        SOURCE_TABLE = "SourceTable"
        TARGET_TABLE = "TargetTable"
        TARGET_DATASET = "TargetDataset"
        SOURCE_VIEW = "SourceView"
        TARGET_VIEW = "TargetView"
        LOAD_TYPE = "LoadType"
github adidas / m3d-api / m3d / m3d.py View on Github external
destination_environment,
            destination_table,
            emr_cluster_id=None
    ):
        # create abstract table object to retrieve source technology
        abstract_table = Table(
            config,
            destination_system,
            destination_database,
            destination_environment,
            destination_table
        )
        destination_system_technology = abstract_table.get_destination_technology()

        # hadoop
        if destination_system_technology == DataSystem.SystemTechnology.HIVE:
            if abstract_table.storage_type == DataSystem.StorageType.S3:
                from m3d.hadoop.emr.emr_system import EMRSystem
                emr_system = EMRSystem(
                    config,
                    destination_system,
                    destination_database,
                    destination_environment,
                    emr_cluster_id
                )
                emr_system.add_cluster_tag(EMRSystem.EMRClusterTag.API_METHOD, M3D.create_table.__name__)
                emr_system.create_table(destination_table)
            else:
                raise m3d_exceptions.M3DUnsupportedStorageException(abstract_table.storage_type)
        else:
            raise m3d_exceptions.M3DUnsupportedDestinationSystemException(destination_system_technology)
github adidas / m3d-api / m3d / m3d.py View on Github external
def run_algorithm(
            config,
            destination_system,
            destination_database,
            destination_environment,
            algorithm_instance,
            emr_cluster_id=None,
            ext_params=None
    ):
        ds = DataSystem(config, destination_system, destination_database, None)
        if ds.database_type == DataSystem.DatabaseType.EMR:
            from m3d.hadoop.algorithm.algorithm_executor_hadoop import AlgorithmExecutorHadoop
            AlgorithmExecutorHadoop.create(
                config_path=config,
                destination_system=destination_system,
                destination_database=destination_database,
                destination_environment=destination_environment,
                algorithm_instance=algorithm_instance,
                emr_cluster_id=emr_cluster_id,
                ext_params_str=ext_params
            ).run()

        else:
            raise m3d_exceptions.M3DUnsupportedDatabaseTypeException(ds.database_type)
github adidas / m3d-api / m3d / hadoop / dataset / dataset.py View on Github external
from m3d.system.data_system import DataSystem


class DataSet(DataSystem):
    def __init__(self, emr_system):
        """
        Initialize representation of Hive table on S3

        :param emr_system: execution system
        """
        # call super constructor
        super(DataSet, self).__init__(
            emr_system.config,
            emr_system.source_system,
            emr_system.database,
            emr_system.environment
        )

        self.emr_system = emr_system
github adidas / m3d-api / m3d / m3d.py View on Github external
def load_table(
            config,
            destination_system,
            destination_database,
            destination_environment,
            destination_table,
            load_type,
            emr_cluster_id=None,
            spark_params=None
    ):
        ds = DataSystem(config, destination_system, destination_database, destination_environment)

        # hadoop
        if ds.database_type == DataSystem.DatabaseType.EMR:
            if ds.storage_type == DataSystem.StorageType.S3:
                from m3d.hadoop.load.load_executor_hadoop import LoadExecutorHadoop
                LoadExecutorHadoop.create(
                    config_path=config,
                    destination_system=destination_system,
                    destination_database=destination_database,
                    destination_environment=destination_environment,
                    destination_table=destination_table,
                    load_type=load_type,
                    emr_cluster_id=emr_cluster_id,
                    spark_params_str=spark_params
                ).run()
            else:
                raise m3d_exceptions.M3DUnsupportedStorageException(ds.storage_type)
        else:
            raise m3d_exceptions.M3DUnsupportedDatabaseTypeException(ds.database_type)
github adidas / m3d-api / m3d / m3d.py View on Github external
def load_table(
            config,
            destination_system,
            destination_database,
            destination_environment,
            destination_table,
            load_type,
            emr_cluster_id=None,
            spark_params=None
    ):
        ds = DataSystem(config, destination_system, destination_database, destination_environment)

        # hadoop
        if ds.database_type == DataSystem.DatabaseType.EMR:
            if ds.storage_type == DataSystem.StorageType.S3:
                from m3d.hadoop.load.load_executor_hadoop import LoadExecutorHadoop
                LoadExecutorHadoop.create(
                    config_path=config,
                    destination_system=destination_system,
                    destination_database=destination_database,
                    destination_environment=destination_environment,
                    destination_table=destination_table,
                    load_type=load_type,
                    emr_cluster_id=emr_cluster_id,
                    spark_params_str=spark_params
                ).run()
            else:
github adidas / m3d-api / m3d / hadoop / algorithm / algorithm_executor_hadoop.py View on Github external
def create(
            config_path,
            destination_system,
            destination_database,
            destination_environment,
            algorithm_instance,
            emr_cluster_id,
            ext_params_str
    ):
        data_system = DataSystem(
            config_path,
            destination_system,
            destination_database,
            destination_environment
        )
        if data_system.database_type == DataSystem.DatabaseType.EMR:
            config = AlgorithmConfigurationHadoop.create(
                config_path,
                destination_database,
                destination_environment,
                algorithm_instance,
                ext_params_str
            )

            execution_system = EMRSystem.from_data_system(data_system, emr_cluster_id)
            return AlgorithmExecutorHadoop(execution_system, config)
github adidas / m3d-api / m3d / system / table.py View on Github external
def return_all_layers():
        return DataLayers

    @staticmethod
    def validate_layers(data_layers):
        if not data_layers:
            raise M3DIllegalArgumentException("No data layer has been specified.")

        valid_layers = DataLayers.return_all_layers()

        for data_layer in data_layers:
            if data_layer not in valid_layers:
                raise M3DIllegalArgumentException("Not a valid data layer: {}".format(data_layer))


class Table(data_system.DataSystem):
    INIT_TYPE_FLAG = "inittype"
    INIT_PAYLOAD = "initpayload"

    def __init__(self, config, source_system, database, environment, table, **kwargs):
        """
        Initialize table config

        :param config: system config file
        :param source_system: system code
        :param database: database code
        :param environment: environment code
        :param table: table code
        """

        # call super constructor
        super(Table, self).__init__(config, source_system, database, environment)
github adidas / m3d-api / m3d / system / data_system.py View on Github external
def __init__(self, config, source_system, database, environment):
        """
        Initialize System Config

        :param config: global system config file
        :param source_system: system code
        :param database: database code
        :param environment: environment code
        """
        # call super constructor
        super(DataSystem, self).__init__(config, source_system, database)

        # store parameters
        self.environment = environment
        self.source_system = source_system
        self.database = database

        # init destination schemas
        self.db_landing = None
        self.db_lake = None
        self.db_lake_out = None
        self.db_mart_mod = None
        self.db_mart_cal = None
        self.db_mart_out = None
        self.db_m3d = None
        self.db_work = None
        self.db_error = None