Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
self.partition_column_format = "" # format of partition column
tconx_provided_file_name = "temptconx_{0}".format(str(int(time.time() * 1000.0)))
remove_tconx_provided_file_name = False
print("PROVIDED TYPE: {0}".format(init_type))
if init_type == TconxTableInitType.FROM_PROVIDED_JSON or init_type == TconxTableInitType.FROM_JSON_FILE:
if init_type == TconxTableInitType.FROM_PROVIDED_JSON:
tconx_file_path = tconx_provided_file_name + ".json"
else:
tconx_file_path = self.config_service.get_tconx_path(source_system, database,
environment, table)
if init_type == TconxTableInitType.FROM_PROVIDED_JSON:
with open(tconx_file_path, 'w') as writer:
writer.write("" if Table.INIT_PAYLOAD not in kwargs else kwargs.__getitem__(Table.INIT_PAYLOAD))
json_parser = JSONTconxParser(tconx_file_path)
self.source_system = json_parser.get_value("source", "system")[0]
self.destination_technology = json_parser.get_value("destination", "technology")[0]
self.table_src = json_parser.get_value("source", "table")[0]
self.table_lake = json_parser.get_value("destination", "table_lake")[0]
self.table_changelog = json_parser.get_value("destination", "table_lake_changelog")[0]
self.table_lakeout = json_parser.get_value("destination", "table_lakeout")[0]
self.delimiter = json_parser.get_value("source_files", "delimiter")[0]
self.source_schema = json_parser.get_value("source", "schema")[0]
# number of header lines in upload CSV files
self.header_lines = json_parser.get_value("source_files", "header_lines")[0]
self.table = self.table_lake.split("_", 1)[-1]
def create_lake_out_view(
config,
destination_system,
destination_database,
destination_environment,
destination_table,
emr_cluster_id=None
):
# create abstract table object to retrieve source technology
abstract_table = Table(
config,
destination_system,
destination_database,
destination_environment,
destination_table
)
destination_system_technology = abstract_table.get_destination_technology()
# hadoop
if destination_system_technology == DataSystem.SystemTechnology.HIVE:
if abstract_table.storage_type == DataSystem.StorageType.S3:
from m3d.hadoop.emr.emr_system import EMRSystem
emr_system = EMRSystem(
config,
destination_system,
destination_database,
from m3d.system import table
from m3d.util import util
from m3d.util.hql_generator import HQLGenerator
from m3d.util.util import Util
class HiveTable(table.Table):
TEMP_TS_COLUMN_NAME = "__temp_timestamp_column__"
class TableLoadType(object):
FULL = "FullLoad"
DELTA = "DeltaLoad"
APPEND = "AppendLoad"
def __init__(
self,
config,
destination_system,
destination_database,
destination_environment,
destination_table,
**kwargs
):
def create_table(
config,
destination_system,
destination_database,
destination_environment,
destination_table,
emr_cluster_id=None
):
# create abstract table object to retrieve source technology
abstract_table = Table(
config,
destination_system,
destination_database,
destination_environment,
destination_table
)
destination_system_technology = abstract_table.get_destination_technology()
# hadoop
if destination_system_technology == DataSystem.SystemTechnology.HIVE:
if abstract_table.storage_type == DataSystem.StorageType.S3:
from m3d.hadoop.emr.emr_system import EMRSystem
emr_system = EMRSystem(
config,
destination_system,
destination_database,
def drop_table(
config,
destination_system,
destination_database,
destination_environment,
destination_table,
emr_cluster_id=None
):
# create abstract table object to retrieve source technology
abstract_table = Table(
config,
destination_system,
destination_database,
destination_environment,
destination_table
)
destination_system_technology = abstract_table.get_destination_technology()
# hadoop
if destination_system_technology == DataSystem.SystemTechnology.HIVE:
if abstract_table.storage_type == DataSystem.StorageType.S3:
from m3d.hadoop.emr.emr_system import EMRSystem
emr_system = EMRSystem(
config,
destination_system,
destination_database,
def __init__(self, config, source_system, database, environment, table, **kwargs):
"""
Initialize table config
:param config: system config file
:param source_system: system code
:param database: database code
:param environment: environment code
:param table: table code
"""
# call super constructor
super(Table, self).__init__(config, source_system, database, environment)
if not kwargs:
kwargs[Table.INIT_TYPE_FLAG] = TconxTableInitType.FROM_JSON_FILE
init_type = kwargs[Table.INIT_TYPE_FLAG]
# store parameters
self.table = table
# init member variables
self.columns_src = [] # column names AND types as in source system
self.columns_lake = [] # column names AND types for lake
self.columns_lakeout = [] # column names for lake_out
self.business_key = [] # list of business key columns
self.datetime_columns = {} # datetime columns and date formats (e.g. yyyy-MM-dd HH:mm:ss)
self.partitioned_by = "" # type of partition (year, month, day)
Initialize table config
:param config: system config file
:param source_system: system code
:param database: database code
:param environment: environment code
:param table: table code
"""
# call super constructor
super(Table, self).__init__(config, source_system, database, environment)
if not kwargs:
kwargs[Table.INIT_TYPE_FLAG] = TconxTableInitType.FROM_JSON_FILE
init_type = kwargs[Table.INIT_TYPE_FLAG]
# store parameters
self.table = table
# init member variables
self.columns_src = [] # column names AND types as in source system
self.columns_lake = [] # column names AND types for lake
self.columns_lakeout = [] # column names for lake_out
self.business_key = [] # list of business key columns
self.datetime_columns = {} # datetime columns and date formats (e.g. yyyy-MM-dd HH:mm:ss)
self.partitioned_by = "" # type of partition (year, month, day)
self.partition_column = "" # lake name of partition column
self.partition_column_format = "" # format of partition column
tconx_provided_file_name = "temptconx_{0}".format(str(int(time.time() * 1000.0)))
remove_tconx_provided_file_name = False
def drop_lake_out_view(
config,
destination_system,
destination_database,
destination_environment,
destination_table,
emr_cluster_id=None
):
# create abstract table object to retrieve source technology
abstract_table = Table(
config,
destination_system,
destination_database,
destination_environment,
destination_table
)
destination_system_technology = abstract_table.get_destination_technology()
# hadoop
if destination_system_technology == DataSystem.SystemTechnology.HIVE:
if abstract_table.storage_type == DataSystem.StorageType.S3:
from m3d.hadoop.emr.emr_system import EMRSystem
emr_system = EMRSystem(
config,
destination_system,
destination_database,
def truncate_table(
config,
destination_system,
destination_database,
destination_environment,
destination_table,
emr_cluster_id=None
):
# create abstract table object to retrieve source technology
abstract_table = Table(
config,
destination_system,
destination_database,
destination_environment,
destination_table
)
destination_system_technology = abstract_table.get_destination_technology()
if destination_system_technology == DataSystem.SystemTechnology.HIVE:
if abstract_table.storage_type == DataSystem.StorageType.S3:
from m3d.hadoop.emr.emr_system import EMRSystem
emr_system = EMRSystem(
config,
destination_system,
destination_database,
destination_environment,