Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
self.api_action_timeout_seconds = params_system["api_action_timeout_seconds"]
self.api_action_polling_interval_seconds = params_system["api_action_polling_interval_seconds"]
self.api_long_timeout_seconds = params_system["api_long_timeout_seconds"]
self.aws_region = params_system["aws_region"]
self.packages_to_deploy = params_system["packages_to_deploy"]
self.configs_to_deploy = params_system["configs_to_deploy"]
# base directories
self.s3_dir_base = params_system["s3_dir_base"]
# defined sub-directories
self.subdir_archive = params_system["subdir"]["archive"]
self.subdir_header = params_system["subdir"]["header"]
self.subdir_config = params_system["subdir"]["config"]
self.subdir_data = params_system["subdir"]["data"]
self.subdir_data_backup = DataSystem.DirectoryName.DATA_BACKUP
self.subdir_error = params_system["subdir"]["error"]
self.subdir_work = params_system["subdir"]["work"]
self.subdir_log = params_system["subdir"]["log"]
self.subdir_apps = params_system["subdir"]["apps"]
self.subdir_m3d_engine = params_system["subdir"]["m3d_engine"]
self.subdir_loading = params_system["subdir"]["loading"]
self.subdir_full_load = params_system["subdir"]["full_load"]
self.subdir_delta_load = params_system["subdir"]["delta_load"]
self.subdir_append_load = params_system["subdir"]["append_load"]
self.subdir_black_whole = params_system["subdir"]["black_whole"]
self.subdir_credentials = params_system["subdir"]["credentials"]
self.subdir_keytab = params_system["subdir"]["keytab"]
self.subdir_tmp = params_system["subdir"]["tmp"]
# deployment directories of M3D application and metadata (tconx)
self.subdir_code = params_system["subdir"]["m3d"]
:return: None. Throws M3DEMRException if emr_cluster_client member of EMRSystem is None
"""
def wrapper(*args, **kwargs):
emr_system = args[0] # this is self
if emr_system.emr_cluster_client is None:
raise M3DEMRException(
"EMRClusterClient is not initiated. EMRSystem.{}() method cannot run without a valid emr_cluster_id "
"passed to the constructor of EMRSystem.".format(func.__name__))
return func(*args, **kwargs)
return wrapper
class EMRSystem(DataSystem):
DEFAULT_ID_LENGTH = 10
DATETIME_FORMAT = "%Y%m%dT%H%M%S"
class EMRClusterTag(object):
API_METHOD = "ApiMethod"
SYSTEM = "System"
ENVIRONMENT = "Environment"
ALGORITHM_INSTANCE = "AlgorithmInstance"
ALGORITHM_CLASS = "AlgorithmClass"
SOURCE_TABLE = "SourceTable"
TARGET_TABLE = "TargetTable"
TARGET_DATASET = "TargetDataset"
SOURCE_VIEW = "SourceView"
TARGET_VIEW = "TargetView"
LOAD_TYPE = "LoadType"
destination_environment,
destination_table,
emr_cluster_id=None
):
# create abstract table object to retrieve source technology
abstract_table = Table(
config,
destination_system,
destination_database,
destination_environment,
destination_table
)
destination_system_technology = abstract_table.get_destination_technology()
# hadoop
if destination_system_technology == DataSystem.SystemTechnology.HIVE:
if abstract_table.storage_type == DataSystem.StorageType.S3:
from m3d.hadoop.emr.emr_system import EMRSystem
emr_system = EMRSystem(
config,
destination_system,
destination_database,
destination_environment,
emr_cluster_id
)
emr_system.add_cluster_tag(EMRSystem.EMRClusterTag.API_METHOD, M3D.create_table.__name__)
emr_system.create_table(destination_table)
else:
raise m3d_exceptions.M3DUnsupportedStorageException(abstract_table.storage_type)
else:
raise m3d_exceptions.M3DUnsupportedDestinationSystemException(destination_system_technology)
def run_algorithm(
config,
destination_system,
destination_database,
destination_environment,
algorithm_instance,
emr_cluster_id=None,
ext_params=None
):
ds = DataSystem(config, destination_system, destination_database, None)
if ds.database_type == DataSystem.DatabaseType.EMR:
from m3d.hadoop.algorithm.algorithm_executor_hadoop import AlgorithmExecutorHadoop
AlgorithmExecutorHadoop.create(
config_path=config,
destination_system=destination_system,
destination_database=destination_database,
destination_environment=destination_environment,
algorithm_instance=algorithm_instance,
emr_cluster_id=emr_cluster_id,
ext_params_str=ext_params
).run()
else:
raise m3d_exceptions.M3DUnsupportedDatabaseTypeException(ds.database_type)
from m3d.system.data_system import DataSystem
class DataSet(DataSystem):
def __init__(self, emr_system):
"""
Initialize representation of Hive table on S3
:param emr_system: execution system
"""
# call super constructor
super(DataSet, self).__init__(
emr_system.config,
emr_system.source_system,
emr_system.database,
emr_system.environment
)
self.emr_system = emr_system
def load_table(
config,
destination_system,
destination_database,
destination_environment,
destination_table,
load_type,
emr_cluster_id=None,
spark_params=None
):
ds = DataSystem(config, destination_system, destination_database, destination_environment)
# hadoop
if ds.database_type == DataSystem.DatabaseType.EMR:
if ds.storage_type == DataSystem.StorageType.S3:
from m3d.hadoop.load.load_executor_hadoop import LoadExecutorHadoop
LoadExecutorHadoop.create(
config_path=config,
destination_system=destination_system,
destination_database=destination_database,
destination_environment=destination_environment,
destination_table=destination_table,
load_type=load_type,
emr_cluster_id=emr_cluster_id,
spark_params_str=spark_params
).run()
else:
raise m3d_exceptions.M3DUnsupportedStorageException(ds.storage_type)
else:
raise m3d_exceptions.M3DUnsupportedDatabaseTypeException(ds.database_type)
def load_table(
config,
destination_system,
destination_database,
destination_environment,
destination_table,
load_type,
emr_cluster_id=None,
spark_params=None
):
ds = DataSystem(config, destination_system, destination_database, destination_environment)
# hadoop
if ds.database_type == DataSystem.DatabaseType.EMR:
if ds.storage_type == DataSystem.StorageType.S3:
from m3d.hadoop.load.load_executor_hadoop import LoadExecutorHadoop
LoadExecutorHadoop.create(
config_path=config,
destination_system=destination_system,
destination_database=destination_database,
destination_environment=destination_environment,
destination_table=destination_table,
load_type=load_type,
emr_cluster_id=emr_cluster_id,
spark_params_str=spark_params
).run()
else:
def create(
config_path,
destination_system,
destination_database,
destination_environment,
algorithm_instance,
emr_cluster_id,
ext_params_str
):
data_system = DataSystem(
config_path,
destination_system,
destination_database,
destination_environment
)
if data_system.database_type == DataSystem.DatabaseType.EMR:
config = AlgorithmConfigurationHadoop.create(
config_path,
destination_database,
destination_environment,
algorithm_instance,
ext_params_str
)
execution_system = EMRSystem.from_data_system(data_system, emr_cluster_id)
return AlgorithmExecutorHadoop(execution_system, config)
def return_all_layers():
return DataLayers
@staticmethod
def validate_layers(data_layers):
if not data_layers:
raise M3DIllegalArgumentException("No data layer has been specified.")
valid_layers = DataLayers.return_all_layers()
for data_layer in data_layers:
if data_layer not in valid_layers:
raise M3DIllegalArgumentException("Not a valid data layer: {}".format(data_layer))
class Table(data_system.DataSystem):
INIT_TYPE_FLAG = "inittype"
INIT_PAYLOAD = "initpayload"
def __init__(self, config, source_system, database, environment, table, **kwargs):
"""
Initialize table config
:param config: system config file
:param source_system: system code
:param database: database code
:param environment: environment code
:param table: table code
"""
# call super constructor
super(Table, self).__init__(config, source_system, database, environment)
def __init__(self, config, source_system, database, environment):
"""
Initialize System Config
:param config: global system config file
:param source_system: system code
:param database: database code
:param environment: environment code
"""
# call super constructor
super(DataSystem, self).__init__(config, source_system, database)
# store parameters
self.environment = environment
self.source_system = source_system
self.database = database
# init destination schemas
self.db_landing = None
self.db_lake = None
self.db_lake_out = None
self.db_mart_mod = None
self.db_mart_cal = None
self.db_mart_out = None
self.db_m3d = None
self.db_work = None
self.db_error = None