Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# Remove comments
hive_view_ddl = functools.reduce(lambda x, y: x + " " + y,
filter(lambda line: not line.startswith("--") and line,
map(lambda x: x.strip(), oracle_view_ddl.split("\n"))
)
)
# Clean up (in this order)
hive_view_ddl = hive_view_ddl.replace("\t", " ")
hive_view_ddl = hive_view_ddl.replace("(+)", "")
hive_view_ddl = hive_view_ddl.replace(" ,", ",")
hive_view_ddl = hive_view_ddl.replace(" =", "=")
hive_view_ddl = hive_view_ddl.replace("= ", "=")
hive_view_ddl = hive_view_ddl.replace(",", ", ")
hive_view_ddl = hive_view_ddl.replace("\"", HQLGenerator.ESCAPE_KEYWORDS_CHAR)
# let's get rid of multiple consecutive spaces
double_space = " "
single_space = " "
while double_space in hive_view_ddl:
hive_view_ddl = hive_view_ddl.replace(double_space, single_space)
# Replace Oracle create view statement with Hive one
hive_view_ddl = hive_view_ddl.replace(
tag_create_view_oracle,
tag_create_view_hive)
return hive_view_ddl
def drop_table(db_table):
drop_table_hql = HQLGenerator.generate_drop_table(db_table).with_semicolon()
try:
logging.info("Dropping table {} from Hive".format(db_table))
self.emr_system.execute_hive(drop_table_hql)
return 1
except Exception:
logging.warning("Unable to drop {} table".format(db_table))
return 0
def __init__(self):
super(HQLGenerator, self).__init__()
def _get_drop_lakeout_statement(self):
return HQLGenerator.generate_drop_view_if_exists(self.db_view_lake_out)
def __init__(self, table_name, table_location, columns):
super(HQLGenerator.CreateParquetTableStatementBuilder, self).__init__(table_name, table_location, columns)
self._format = "STORED AS PARQUET"
def _get_create_database_if_not_exists(database_name):
return HQLGenerator.generate_create_database_if_not_exits(database_name).with_semicolon()
def reset_table(db_table, create_table_hql, table_location, table_partitioned_flag):
if table_partitioned_flag:
drop_table_hql = HQLGenerator.generate_drop_table(db_table).with_semicolon()
repair_table_hql = HQLGenerator.generate_repair_table(db_table).with_semicolon()
hql = "\n".join([drop_table_hql, create_table_hql, repair_table_hql])
else:
hql = HQLGenerator.generate_alter_table_location(db_table, table_location).with_semicolon()
try:
logging.info("Resetting '{}' table.".format(db_table))
self.emr_system.execute_hive(hql)
except M3DEMRStepException as e:
if "Table not found" in str(e):
pass # the table might already not be present, so we will ignore error arising from that case
else:
logging.info("Failed to reset '{}' table: {}".format(db_table, e))
raise
def create_statement(_columns, _target_partitions=None):
return HQLGenerator.CreateParquetTableStatementBuilder(self.db_table_lake, table_location, _columns) \
.partitioned_by(_target_partitions) \
.with_properties({"serialization.encoding": "UTF-8"}) \
.build(is_external=True)
def create_tables(self):
hql = "\n".join([
self._get_create_database_if_not_exists(self.db_landing),
self._get_create_database_if_not_exists(self.db_lake),
self._get_create_landing_statement(self.dir_landing_final).with_semicolon(),
HQLGenerator.generate_repair_table(self.db_table_landing).with_semicolon(),
self._get_create_lake_statement(self.dir_lake_final).with_semicolon(),
HQLGenerator.generate_repair_table(self.db_table_lake).with_semicolon()
])
try:
self.emr_system.execute_hive(hql)
except Exception:
msg = "Failed to create {} and {} tables.".format(self.db_table_landing, self.db_table_lake)
logging.error(msg)
raise
logging.info("Successfully created {} and {} tables.".format(self.db_table_landing, self.db_table_lake))
def create_tables(self):
hql = "\n".join([
self._get_create_database_if_not_exists(self.db_landing),
self._get_create_database_if_not_exists(self.db_lake),
self._get_create_landing_statement(self.dir_landing_final).with_semicolon(),
HQLGenerator.generate_repair_table(self.db_table_landing).with_semicolon(),
self._get_create_lake_statement(self.dir_lake_final).with_semicolon(),
HQLGenerator.generate_repair_table(self.db_table_lake).with_semicolon()
])
try:
self.emr_system.execute_hive(hql)
except Exception:
msg = "Failed to create {} and {} tables.".format(self.db_table_landing, self.db_table_lake)
logging.error(msg)
raise
logging.info("Successfully created {} and {} tables.".format(self.db_table_landing, self.db_table_lake))