How to use datacompy - 10 common examples

To help you get started, we’ve selected a few datacompy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github capitalone / datacompy / tests / test_core.py View on Github external
def test_index_joining_non_overlapping():
    df1 = pd.DataFrame([{"a": "hi", "b": 2}, {"a": "bye", "b": 2}])
    df2 = pd.DataFrame([{"a": "hi", "b": 2}, {"a": "bye", "b": 2}, {"a": "back fo mo", "b": 3}])
    compare = datacompy.Compare(df1, df2, on_index=True)
    assert not compare.matches()
    assert compare.all_columns_match()
    assert compare.intersect_rows_match()
    assert len(compare.df1_unq_rows) == 0
    assert len(compare.df2_unq_rows) == 1
    assert list(compare.df2_unq_rows["a"]) == ["back fo mo"]
github capitalone / datacompy / tests / test_core.py View on Github external
def test_compare_df_setter_good():
    df1 = pd.DataFrame([{"a": 1, "b": 2}, {"a": 2, "b": 2}])
    df2 = pd.DataFrame([{"A": 1, "B": 2}, {"A": 2, "B": 3}])
    compare = datacompy.Compare(df1, df2, ["a"])
    assert compare.df1.equals(df1)
    assert compare.df2.equals(df2)
    assert compare.join_columns == ["a"]
    compare = datacompy.Compare(df1, df2, ["A", "b"])
    assert compare.df1.equals(df1)
    assert compare.df2.equals(df2)
    assert compare.join_columns == ["a", "b"]
github capitalone / datacompy / tests / test_core.py View on Github external
def test_index_with_joins_with_ignore_spaces():
    df1 = pd.DataFrame([{"a": 1, "b": " A"}, {"a": 2, "b": "A"}])
    df2 = pd.DataFrame([{"a": 1, "b": "A"}, {"a": 2, "b": "A "}])
    compare = datacompy.Compare(df1, df2, on_index=True, ignore_spaces=False)
    assert not compare.matches()
    assert compare.all_columns_match()
    assert compare.all_rows_overlap()
    assert not compare.intersect_rows_match()

    compare = datacompy.Compare(df1, df2, "a", ignore_spaces=True)
    assert compare.matches()
    assert compare.all_columns_match()
    assert compare.all_rows_overlap()
    assert compare.intersect_rows_match()
github capitalone / datacompy / tests / test_core.py View on Github external
# should match
    df1 = pd.DataFrame({"a": [1, 2, 3], "b": [0, 1, 2]})
    df2 = pd.DataFrame({"a": [1, 2, 3], "B": [0, 1, 2]})
    compare = datacompy.Compare(df1, df2, join_columns=["a"])
    assert compare.matches()
    # should not match
    df1 = pd.DataFrame({"a": [1, 2, 3], "b": [0, 1, 2]})
    df2 = pd.DataFrame({"a": [1, 2, 3], "B": [0, 1, 2]})
    compare = datacompy.Compare(df1, df2, join_columns=["a"], cast_column_names_lower=False)
    assert not compare.matches()

    # test join column
    # should match
    df1 = pd.DataFrame({"a": [1, 2, 3], "b": [0, 1, 2]})
    df2 = pd.DataFrame({"A": [1, 2, 3], "B": [0, 1, 2]})
    compare = datacompy.Compare(df1, df2, join_columns=["a"])
    assert compare.matches()
    # should fail because "a" is not found in df2
    df1 = pd.DataFrame({"a": [1, 2, 3], "b": [0, 1, 2]})
    df2 = pd.DataFrame({"A": [1, 2, 3], "B": [0, 1, 2]})
    expected_message = "df2 must have all columns from join_columns"
    with raises(ValueError, match=expected_message):
        compare = datacompy.Compare(df1, df2, join_columns=["a"], cast_column_names_lower=False)
github capitalone / datacompy / tests / test_core.py View on Github external
def test_dupes_with_nulls():
    df1 = pd.DataFrame(
        {
            "fld_1": [1, 2, 2, 3, 3, 4, 5, 5],
            "fld_2": ["A", np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
        }
    )
    df2 = pd.DataFrame({"fld_1": [1, 2, 3, 4, 5], "fld_2": ["A", np.nan, np.nan, np.nan, np.nan]})
    comp = datacompy.Compare(df1, df2, join_columns=["fld_1", "fld_2"])
    assert comp.subset()
github capitalone / datacompy / tests / test_core.py View on Github external
def test_index_with_joins_with_ignore_case():
    df1 = pd.DataFrame([{"a": 1, "b": "a"}, {"a": 2, "b": "A"}])
    df2 = pd.DataFrame([{"a": 1, "b": "A"}, {"a": 2, "b": "a"}])
    compare = datacompy.Compare(df1, df2, on_index=True, ignore_case=False)
    assert not compare.matches()
    assert compare.all_columns_match()
    assert compare.all_rows_overlap()
    assert not compare.intersect_rows_match()

    compare = datacompy.Compare(df1, df2, "a", ignore_case=True)
    assert compare.matches()
    assert compare.all_columns_match()
    assert compare.all_rows_overlap()
    assert compare.intersect_rows_match()
github capitalone / datacompy / tests / test_core.py View on Github external
def test_simple_dupes_one_field_two_vals():
    df1 = pd.DataFrame([{"a": 1, "b": 2}, {"a": 1, "b": 0}])
    df2 = pd.DataFrame([{"a": 1, "b": 2}, {"a": 1, "b": 0}])
    compare = datacompy.Compare(df1, df2, join_columns=["a"])
    assert compare.matches()
    # Just render the report to make sure it renders.
    t = compare.report()
github capitalone / datacompy / tests / test_core.py View on Github external
def test_compare_df_setter_bad():
    df = pd.DataFrame([{"a": 1, "A": 2}, {"a": 2, "A": 2}])
    with raises(TypeError, match="df1 must be a pandas DataFrame"):
        compare = datacompy.Compare("a", "a", ["a"])
    with raises(ValueError, match="df1 must have all columns from join_columns"):
        compare = datacompy.Compare(df, df.copy(), ["b"])
    with raises(ValueError, match="df1 must have unique column names"):
        compare = datacompy.Compare(df, df.copy(), ["a"])
    df_dupe = pd.DataFrame([{"a": 1, "b": 2}, {"a": 1, "b": 3}])
    assert datacompy.Compare(df_dupe, df_dupe.copy(), ["a", "b"]).df1.equals(df_dupe)
github capitalone / datacompy / tests / test_core.py View on Github external
def test_compare_df_setter_bad_index():
    df = pd.DataFrame([{"a": 1, "A": 2}, {"a": 2, "A": 2}])
    with raises(TypeError, match="df1 must be a pandas DataFrame"):
        compare = datacompy.Compare("a", "a", on_index=True)
    with raises(ValueError, match="df1 must have unique column names"):
        compare = datacompy.Compare(df, df.copy(), on_index=True)
github capitalone / datacompy / tests / test_core.py View on Github external
def test_compare_df_setter_bad_index():
    df = pd.DataFrame([{"a": 1, "A": 2}, {"a": 2, "A": 2}])
    with raises(TypeError, match="df1 must be a pandas DataFrame"):
        compare = datacompy.Compare("a", "a", on_index=True)
    with raises(ValueError, match="df1 must have unique column names"):
        compare = datacompy.Compare(df, df.copy(), on_index=True)