Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def comparison_known_diffs1(base_td, compare_source, spark):
return SparkCompare(
spark,
base_td,
compare_source,
join_columns=[("acct", "ACCOUNT_IDENTIFIER"), ("acct_seq", "SEQ_NUMBER")],
column_mapping=[("stat_cd", "STATC"), ("open_dt", "ACCOUNT_OPEN"), ("cd", "CODE")],
known_differences=[
{
"name": "Left-padded, four-digit numeric code",
"types": datacompy.NUMERIC_SPARK_TYPES,
"transformation": "lpad(cast({input} AS bigint), 4, '0')",
},
{
"name": "Null to *2",
"types": ["string"],
"transformation": "case when {input} is null then '*2' else {input} end",
},
{
"name": "Julian date -> date",
"types": ["bigint"],
"transformation": "to_date(cast(unix_timestamp(cast({input} AS string), 'yyyyDDD') AS timestamp))",
},
def test_decimal_comparisons():
true_decimals = ["decimal", "decimal()", "decimal(20, 10)"]
assert all(v in datacompy.NUMERIC_SPARK_TYPES for v in true_decimals)