How to use the m3d.hadoop.core.hive_table.HiveTable function in m3d

To help you get started, we’ve selected a few m3d examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github adidas / m3d-api / test / integration / test_load_table_append_s3.py View on Github external
self.table_config = [self.config_file] + destination_params
        emr_system = EMRSystem(
            self.config_file,
            self.destination_system,
            self.destination_database,
            self.destination_environment
        )

        # self.s3_table = S3Table(emr_system, self.destination_table)
        if data_type is None:
            data_type = DataType.STRUCTURED

        self.dataset = DataSetFactory.create_dataset(
            emr_system,
            HiveTable.TableLoadType.APPEND,
            data_type,
            self.destination_table
        )

        config_filename = "append_load-{}-{}.json".format(self.destination_environment, self.dataset.table_lake)
        self.config_filepath = os.path.join(self.dataset.dir_apps_append_load, config_filename)
        self.db_name_lake = self.scon_emr_dict["environments"][self.destination_environment]["schemas"]["lake"]

        self.expected_algorithms_jar_path = "s3://" + os.path.join(
            (self.scon_emr_dict["environments"][self.destination_environment]["s3_buckets"]["application"]).strip("/"),
            (self.scon_emr_dict["environments"][self.destination_environment]["s3_deployment_dir_base"]).strip("/"),
            self.destination_environment,
            self.scon_emr_dict["subdir"]["m3d"],
            self.config_dict["subdir_projects"]["m3d_api"],
            self.scon_emr_dict["spark"]["jar_name"]
        )
github adidas / m3d-api / m3d / hadoop / load / load_executor_hadoop.py View on Github external
def _get_supported_emr_load_types():
        """
        Return a list of the available EMR load

        :return: dictionary load-name -> load-class
        """

        return {
            HiveTable.TableLoadType.FULL: FullLoad,
            HiveTable.TableLoadType.DELTA: DeltaLoad,
            HiveTable.TableLoadType.APPEND: AppendLoad
        }
github adidas / m3d-api / m3d / hadoop / load / append_load.py View on Github external
def get_load_type(self):
        return HiveTable.TableLoadType.APPEND
github adidas / m3d-api / m3d / hadoop / load / full_load.py View on Github external
def get_load_type(self):
        return HiveTable.TableLoadType.FULL
github adidas / m3d-api / m3d / hadoop / core / hive_table.py View on Github external
destination_environment,
            destination_table,
            **kwargs
    ):
        """
        Initialize Hive table

        :param config: system config file
        :param destination_system: destination system code
        :param destination_database: destination database code
        :param destination_environment: destination environment code
        :param destination_table: destination table code
        """

        # call super constructor
        super(HiveTable, self).__init__(
            config,
            destination_system,
            destination_database,
            destination_environment,
            destination_table,
            **kwargs
        )
github adidas / m3d-api / m3d / hadoop / load / delta_load.py View on Github external
def get_load_type(self):
        return HiveTable.TableLoadType.DELTA
github adidas / m3d-api / m3d / hadoop / dataset / dataset_factory.py View on Github external
def create_dataset(execution_system, load_type, data_type, dataset_name):

        if data_type == DataType.STRUCTURED:
            dataset = S3Table(
                emr_system=execution_system,
                destination_table=dataset_name
            )
        elif data_type == DataType.SEMISTRUCTURED:
            if load_type == HiveTable.TableLoadType.APPEND:
                dataset = SemistructuredDataSet(
                    emr_system=execution_system,
                    dataset_name=dataset_name
                )
            else:
                raise M3DUnsupportedLoadTypeException(
                    load_type=load_type,
                    message="Loading algorithm {} not support for data type {}.".format(load_type, data_type)
                )
        else:
            raise M3DUnsupportedDataTypeException(
                message="Data Type {} not available.".format(data_type)
            )

        return dataset
github adidas / m3d-api / m3d / hadoop / load / load_executor_hadoop.py View on Github external
def _get_supported_emr_load_types():
        """
        Return a list of the available EMR load

        :return: dictionary load-name -> load-class
        """

        return {
            HiveTable.TableLoadType.FULL: FullLoad,
            HiveTable.TableLoadType.DELTA: DeltaLoad,
            HiveTable.TableLoadType.APPEND: AppendLoad
        }
github adidas / m3d-api / m3d / hadoop / emr / s3_table.py View on Github external
import logging
import os

from m3d.config.config_service import ConfigService
from m3d.exceptions.m3d_exceptions import M3DDatabaseException, M3DException
from m3d.hadoop.core.hive_table import HiveTable
from m3d.hadoop.core.spark_parameters import SparkParameters
from m3d.hadoop.emr.emr_exceptions import M3DEMRStepException
from m3d.util.hql_generator import HQLGenerator


class S3Table(HiveTable):

    def __init__(self, emr_system, destination_table, spark_params=None, **kwargs):
        """
        Initialize representation of Hive table on S3

        :param config: system config file
        :param destination_system: destination system code
        :param destination_database: destination database code
        :param destination_environment: destination environment code
        :param destination_table: destination table code
        :param emr_cluster_id: emr cluster id
        :param spark_params: external spark parameters to override scon defaults
        """

        # call super constructor
        super(S3Table, self).__init__(