How to use the m3d.system.table.Table function in m3d

To help you get started, we’ve selected a few m3d examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github adidas / m3d-api / m3d / system / table.py View on Github external
self.partition_column_format = ""  # format of partition column

        tconx_provided_file_name = "temptconx_{0}".format(str(int(time.time() * 1000.0)))
        remove_tconx_provided_file_name = False

        print("PROVIDED TYPE: {0}".format(init_type))

        if init_type == TconxTableInitType.FROM_PROVIDED_JSON or init_type == TconxTableInitType.FROM_JSON_FILE:
            if init_type == TconxTableInitType.FROM_PROVIDED_JSON:
                tconx_file_path = tconx_provided_file_name + ".json"
            else:
                tconx_file_path = self.config_service.get_tconx_path(source_system, database,
                                                                     environment, table)
            if init_type == TconxTableInitType.FROM_PROVIDED_JSON:
                with open(tconx_file_path, 'w') as writer:
                    writer.write("" if Table.INIT_PAYLOAD not in kwargs else kwargs.__getitem__(Table.INIT_PAYLOAD))

            json_parser = JSONTconxParser(tconx_file_path)

            self.source_system = json_parser.get_value("source", "system")[0]
            self.destination_technology = json_parser.get_value("destination", "technology")[0]
            self.table_src = json_parser.get_value("source", "table")[0]
            self.table_lake = json_parser.get_value("destination", "table_lake")[0]
            self.table_changelog = json_parser.get_value("destination", "table_lake_changelog")[0]
            self.table_lakeout = json_parser.get_value("destination", "table_lakeout")[0]
            self.delimiter = json_parser.get_value("source_files", "delimiter")[0]
            self.source_schema = json_parser.get_value("source", "schema")[0]

            # number of header lines in upload CSV files
            self.header_lines = json_parser.get_value("source_files", "header_lines")[0]

            self.table = self.table_lake.split("_", 1)[-1]
github adidas / m3d-api / m3d / m3d.py View on Github external
def create_lake_out_view(
            config,
            destination_system,
            destination_database,
            destination_environment,
            destination_table,
            emr_cluster_id=None
    ):
        # create abstract table object to retrieve source technology
        abstract_table = Table(
            config,
            destination_system,
            destination_database,
            destination_environment,
            destination_table
        )
        destination_system_technology = abstract_table.get_destination_technology()

        # hadoop
        if destination_system_technology == DataSystem.SystemTechnology.HIVE:
            if abstract_table.storage_type == DataSystem.StorageType.S3:
                from m3d.hadoop.emr.emr_system import EMRSystem
                emr_system = EMRSystem(
                    config,
                    destination_system,
                    destination_database,
github adidas / m3d-api / m3d / hadoop / core / hive_table.py View on Github external
from m3d.system import table
from m3d.util import util
from m3d.util.hql_generator import HQLGenerator
from m3d.util.util import Util


class HiveTable(table.Table):
    TEMP_TS_COLUMN_NAME = "__temp_timestamp_column__"

    class TableLoadType(object):
        FULL = "FullLoad"
        DELTA = "DeltaLoad"
        APPEND = "AppendLoad"

    def __init__(
            self,
            config,
            destination_system,
            destination_database,
            destination_environment,
            destination_table,
            **kwargs
    ):
github adidas / m3d-api / m3d / m3d.py View on Github external
def create_table(
            config,
            destination_system,
            destination_database,
            destination_environment,
            destination_table,
            emr_cluster_id=None
    ):
        # create abstract table object to retrieve source technology
        abstract_table = Table(
            config,
            destination_system,
            destination_database,
            destination_environment,
            destination_table
        )
        destination_system_technology = abstract_table.get_destination_technology()

        # hadoop
        if destination_system_technology == DataSystem.SystemTechnology.HIVE:
            if abstract_table.storage_type == DataSystem.StorageType.S3:
                from m3d.hadoop.emr.emr_system import EMRSystem
                emr_system = EMRSystem(
                    config,
                    destination_system,
                    destination_database,
github adidas / m3d-api / m3d / m3d.py View on Github external
def drop_table(
            config,
            destination_system,
            destination_database,
            destination_environment,
            destination_table,
            emr_cluster_id=None
    ):
        # create abstract table object to retrieve source technology
        abstract_table = Table(
            config,
            destination_system,
            destination_database,
            destination_environment,
            destination_table
        )
        destination_system_technology = abstract_table.get_destination_technology()

        # hadoop
        if destination_system_technology == DataSystem.SystemTechnology.HIVE:
            if abstract_table.storage_type == DataSystem.StorageType.S3:
                from m3d.hadoop.emr.emr_system import EMRSystem
                emr_system = EMRSystem(
                    config,
                    destination_system,
                    destination_database,
github adidas / m3d-api / m3d / system / table.py View on Github external
def __init__(self, config, source_system, database, environment, table, **kwargs):
        """
        Initialize table config

        :param config: system config file
        :param source_system: system code
        :param database: database code
        :param environment: environment code
        :param table: table code
        """

        # call super constructor
        super(Table, self).__init__(config, source_system, database, environment)

        if not kwargs:
            kwargs[Table.INIT_TYPE_FLAG] = TconxTableInitType.FROM_JSON_FILE

        init_type = kwargs[Table.INIT_TYPE_FLAG]

        # store parameters
        self.table = table

        # init member variables
        self.columns_src = []  # column names AND types as in source system
        self.columns_lake = []  # column names AND types for lake
        self.columns_lakeout = []  # column names for lake_out
        self.business_key = []  # list of business key columns
        self.datetime_columns = {}  # datetime columns and date formats (e.g. yyyy-MM-dd HH:mm:ss)
        self.partitioned_by = ""  # type of partition (year, month, day)
github adidas / m3d-api / m3d / system / table.py View on Github external
Initialize table config

        :param config: system config file
        :param source_system: system code
        :param database: database code
        :param environment: environment code
        :param table: table code
        """

        # call super constructor
        super(Table, self).__init__(config, source_system, database, environment)

        if not kwargs:
            kwargs[Table.INIT_TYPE_FLAG] = TconxTableInitType.FROM_JSON_FILE

        init_type = kwargs[Table.INIT_TYPE_FLAG]

        # store parameters
        self.table = table

        # init member variables
        self.columns_src = []  # column names AND types as in source system
        self.columns_lake = []  # column names AND types for lake
        self.columns_lakeout = []  # column names for lake_out
        self.business_key = []  # list of business key columns
        self.datetime_columns = {}  # datetime columns and date formats (e.g. yyyy-MM-dd HH:mm:ss)
        self.partitioned_by = ""  # type of partition (year, month, day)
        self.partition_column = ""  # lake name of partition column
        self.partition_column_format = ""  # format of partition column

        tconx_provided_file_name = "temptconx_{0}".format(str(int(time.time() * 1000.0)))
        remove_tconx_provided_file_name = False
github adidas / m3d-api / m3d / m3d.py View on Github external
def drop_lake_out_view(
            config,
            destination_system,
            destination_database,
            destination_environment,
            destination_table,
            emr_cluster_id=None
    ):
        # create abstract table object to retrieve source technology
        abstract_table = Table(
            config,
            destination_system,
            destination_database,
            destination_environment,
            destination_table
        )
        destination_system_technology = abstract_table.get_destination_technology()

        # hadoop
        if destination_system_technology == DataSystem.SystemTechnology.HIVE:
            if abstract_table.storage_type == DataSystem.StorageType.S3:
                from m3d.hadoop.emr.emr_system import EMRSystem
                emr_system = EMRSystem(
                    config,
                    destination_system,
                    destination_database,
github adidas / m3d-api / m3d / m3d.py View on Github external
def truncate_table(
            config,
            destination_system,
            destination_database,
            destination_environment,
            destination_table,
            emr_cluster_id=None
    ):
        # create abstract table object to retrieve source technology
        abstract_table = Table(
            config,
            destination_system,
            destination_database,
            destination_environment,
            destination_table
        )
        destination_system_technology = abstract_table.get_destination_technology()

        if destination_system_technology == DataSystem.SystemTechnology.HIVE:
            if abstract_table.storage_type == DataSystem.StorageType.S3:
                from m3d.hadoop.emr.emr_system import EMRSystem
                emr_system = EMRSystem(
                    config,
                    destination_system,
                    destination_database,
                    destination_environment,