How to use the datacompy.SparkCompare function in datacompy

To help you get started, we’ve selected a few datacompy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github capitalone / datacompy / tests / test_sparkcompare.py View on Github external
def comparison_neg_tol_fixture(base_tol, compare_both_tol, spark):
    return SparkCompare(
        spark,
        base_tol,
        compare_both_tol,
        join_columns=["account_identifier"],
        rel_tol=-0.2,
        abs_tol=0.01,
    )
github capitalone / datacompy / tests / test_sparkcompare.py View on Github external
def comparison_decimal_fixture(base_decimal, compare_decimal, spark):
    return SparkCompare(spark, base_decimal, compare_decimal, join_columns=["acct"])
github capitalone / datacompy / tests / test_sparkcompare.py View on Github external
def comparison_rel_tol_fixture(base_tol, compare_rel_tol, spark):
    return SparkCompare(
        spark, base_tol, compare_rel_tol, join_columns=["account_identifier"], rel_tol=0.1
    )
github capitalone / datacompy / tests / test_sparkcompare.py View on Github external
def comparison1_fixture(base_df1, compare_df1, spark):
    return SparkCompare(
        spark, base_df1, compare_df1, join_columns=["acct"], cache_intermediates=CACHE_INTERMEDIATES
    )
github capitalone / datacompy / tests / test_sparkcompare.py View on Github external
def comparison_abs_tol_fixture(base_tol, compare_abs_tol, spark):
    return SparkCompare(
        spark, base_tol, compare_abs_tol, join_columns=["account_identifier"], abs_tol=0.01
    )
github capitalone / datacompy / tests / test_sparkcompare.py View on Github external
def comparison_known_diffs2(base_td, compare_source, spark):
    return SparkCompare(
        spark,
        base_td,
        compare_source,
        join_columns=[("acct", "ACCOUNT_IDENTIFIER"), ("acct_seq", "SEQ_NUMBER")],
        column_mapping=[("stat_cd", "STATC"), ("open_dt", "ACCOUNT_OPEN"), ("cd", "CODE")],
        known_differences=[
            {
                "name": "Left-padded, four-digit numeric code",
                "types": datacompy.NUMERIC_SPARK_TYPES,
                "transformation": "lpad(cast({input} AS bigint), 4, '0')",
            },
            {
                "name": "Null to *2",
                "types": ["string"],
                "transformation": "case when {input} is null then '*2' else {input} end",
            },
github capitalone / datacompy / tests / test_sparkcompare.py View on Github external
def comparison4_fixture(base_df2, compare_df1, spark):
    return SparkCompare(
        spark,
        base_df2,
        compare_df1,
        join_columns=["acct"],
        column_mapping=[("super_duper_big_long_name", "name")],
    )
github capitalone / datacompy / tests / test_sparkcompare.py View on Github external
def show_all_columns_and_match_rate_fixture(base_tol, compare_both_tol, spark):
    return SparkCompare(
        spark,
        base_tol,
        compare_both_tol,
        join_columns=["account_identifier"],
        show_all_columns=True,
        match_rates=True,
    )
github capitalone / datacompy / tests / test_sparkcompare.py View on Github external
def test_negative_tolerances(spark, base_tol, compare_both_tol):
    with pytest.raises(ValueError, match="Please enter positive valued tolerances"):
        comp = SparkCompare(
            spark,
            base_tol,
            compare_both_tol,
            join_columns=["account_identifier"],
            rel_tol=-0.2,
            abs_tol=0.01,
        )
        comp.report()
        pass
github capitalone / datacompy / tests / test_sparkcompare.py View on Github external
def comparison2_fixture(base_df1, compare_df2, spark):
    return SparkCompare(spark, base_df1, compare_df2, join_columns=["acct"])