How to use the pandera.Check function in pandera

To help you get started, we’ve selected a few pandera examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pandera-dev / pandera / tests / test_schemas.py View on Github external
def test_dataframe_schema_check_function_types(check_function, should_fail):
    schema = DataFrameSchema(
        {
            "a": Column(Int,
                        Check(fn=check_function, element_wise=False)),
            "b": Column(Float,
                        Check(fn=check_function, element_wise=False))
        })
    df = pd.DataFrame({
        "a": [1, 2, 3],
        "b": [1.1, 2.5, 9.9]
    })
    if should_fail:
        with pytest.raises(errors.SchemaError):
            schema.validate(df)
    else:
        schema.validate(df)
github pandera-dev / pandera / tests / test_pandera.py View on Github external
def test_sample_dataframe_schema():
    df = pd.DataFrame({
        "col1": range(1, 1001)
    })

    # assert all values -1
    schema = DataFrameSchema(
        columns={"col1": Column(Int, Check(lambda s: s == -1))})

    for seed in [11, 123456, 9000, 654]:
        sample_index = df.sample(100, random_state=seed).index
        df.loc[sample_index] = -1
        assert schema.validate(df, sample=100, random_state=seed).equals(df)
github pandera-dev / pandera / tests / test_pandera.py View on Github external
def test_multi_index_columns():
    schema = DataFrameSchema({
        ("zero", "foo"): Column(Float, Check(lambda s: (s > 0) & (s < 1))),
        ("zero", "bar"): Column(
            String, Check(lambda s: s.isin(["a", "b", "c", "d"]))),
        ("one", "foo"): Column(Int, Check(lambda s: (s > 0) & (s < 10))),
        ("one", "bar"): Column(
            DateTime, Check(lambda s: s == pd.datetime(2019, 1, 1)))
    })
    validated_df = schema.validate(
        pd.DataFrame({
            ("zero", "foo"): [0.1, 0.2, 0.7, 0.3],
            ("zero", "bar"): ["a", "b", "c", "d"],
            ("one", "foo"): [1, 6, 4, 7],
            ("one", "bar"): pd.to_datetime(["2019/01/01"] * 4)
        })
    )
    assert isinstance(validated_df, pd.DataFrame)
github pandera-dev / pandera / tests / test_checks.py View on Github external
def init_schema_element_wise():
        DataFrameSchema({
            "col1": Column(Int, [
                Check(lambda s: s["foo"] > 10,
                      element_wise=True,
                      groupby=["col2"]),
            ]),
            "col2": Column(String, Check(lambda s: s.isin(["foo", "bar"]))),
        })
github pandera-dev / pandera / tests / test_pandera.py View on Github external
def test_index_schema():
    schema = DataFrameSchema(
        columns={},
        index=Index(
            Int, [
                Check(lambda x: 1 <= x <= 11, element_wise=True),
                Check(lambda index: index.mean() > 1)]
        ))
    df = pd.DataFrame(index=range(1, 11), dtype="int64")
    assert isinstance(schema.validate(df), pd.DataFrame)

    with pytest.raises(errors.SchemaError):
        schema.validate(pd.DataFrame(index=range(1, 20)))
github pandera-dev / pandera / tests / test_schemas.py View on Github external
def test_series_schema_multiple_validators():
    schema = SeriesSchema(
        Int, [
            Check(lambda x: 0 <= x <= 50, element_wise=True),
            Check(lambda s: (s == 21).any())])
    validated_series = schema.validate(pd.Series([1, 5, 21, 50]))
    assert isinstance(validated_series, pd.Series)

    # raise error if any of the validators fails
    with pytest.raises(errors.SchemaError):
        schema.validate(pd.Series([1, 5, 20, 50]))
github pandera-dev / pandera / tests / test_schemas.py View on Github external
def test_coerce_dtype_in_dataframe():
    df = pd.DataFrame({
        "column1": [10.0, 20.0, 30.0],
        "column2": ["2018-01-01", "2018-02-01", "2018-03-01"],
        "column3": [1, 2, None],
        "column4": [1., 1., np.nan],
    })
    # specify `coerce` at the Column level
    schema1 = DataFrameSchema({
        "column1": Column(Int, Check(lambda x: x > 0), coerce=True),
        "column2": Column(DateTime, coerce=True),
        "column3": Column(String, coerce=True, nullable=True),
    })
    # specify `coerce` at the DataFrameSchema level
    schema2 = DataFrameSchema({
        "column1": Column(Int, Check(lambda x: x > 0)),
        "column2": Column(DateTime),
        "column3": Column(String, nullable=True),
    }, coerce=True)

    for schema in [schema1, schema2]:
        result = schema.validate(df)
        assert result.column1.dtype == Int.value
        assert result.column2.dtype == DateTime.value
        for _, x in result.column3.iteritems():
            assert pd.isna(x) or isinstance(x, str)

        # make sure that correct error is raised when null values are present
        # in a float column that's coerced to an int
        schema = DataFrameSchema({"column4": Column(Int, coerce=True)})
        with pytest.raises(ValueError):
            schema.validate(df)
github pandera-dev / pandera / tests / test_checks.py View on Github external
def test_format_failure_case_exceptions():
    check = Check(lambda x: x.isna().sum() == 0)
    for data in [1, "foobar", 1.0, {"key": "value"}, list(range(10))]:
        with pytest.raises(TypeError):
            check._format_failure_cases(data)
github pandera-dev / pandera / tests / test_schemas.py View on Github external
def test_head_dataframe_schema():
    """
    Test that schema can validate head of dataframe, returns entire dataframe.
    """

    df = pd.DataFrame({
        "col1": [i for i in range(100)] + [i for i in range(-1, -1001, -1)]
    })

    schema = DataFrameSchema(
        columns={"col1": Column(Int, Check(lambda s: s >= 0))})

    # Validating with head of 100 should pass
    assert schema.validate(df, head=100).equals(df)
    with pytest.raises(errors.SchemaError):
        schema.validate(df)
github pandera-dev / pandera / tests / test_pandera.py View on Github external
def init_schema_no_groupby_column():
        DataFrameSchema({
            "col1": Column(Int, [
                Check(lambda s: s["foo"] > 10, groupby=["col2"]),
            ]),
        })

    with pytest.raises(errors.SchemaInitError):
        init_schema_no_groupby_column()

    # can't use groupby argument in SeriesSchema or Index objects
    for SchemaClass in [SeriesSchema, Index]:
        with pytest.raises(
                errors.SchemaInitError,
                match="^Cannot use groupby checks with"):
            SchemaClass(Int, Check(lambda s: s["bar"] == 1, groupby="foo"))