How to use the pandera.Index function in pandera

To help you get started, we’ve selected a few pandera examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pandera-dev / pandera / tests / test_checks.py View on Github external
init_schema_element_wise()

    # raise errors.SchemaInitError even when the schema doesn't specify column
    # key for groupby column
    def init_schema_no_groupby_column():
        DataFrameSchema({
            "col1": Column(Int, [
                Check(lambda s: s["foo"] > 10, groupby=["col2"]),
            ]),
        })

    with pytest.raises(errors.SchemaInitError):
        init_schema_no_groupby_column()

    # can't use groupby argument in SeriesSchema or Index objects
    for SchemaClass in [SeriesSchema, Index]:
        with pytest.raises(
                errors.SchemaInitError,
                match="^Cannot use groupby checks with"):
            SchemaClass(Int, Check(lambda s: s["bar"] == 1, groupby="foo"))
github pandera-dev / pandera / tests / test_checks.py View on Github external
def test_check_groupby():
    schema = DataFrameSchema(
        columns={
            "col1": Column(Int, [
                Check(lambda s: s["foo"] > 10, groupby="col2"),
                Check(lambda s: s["bar"] < 10, groupby=["col2"]),
                Check(lambda s: s["foo"] > 10,
                      groupby=lambda df: df.groupby("col2")),
                Check(lambda s: s["bar"] < 10,
                      groupby=lambda df: df.groupby("col2"))
            ]),
            "col2": Column(String, Check(lambda s: s.isin(["foo", "bar"]))),
        },
        index=Index(Int, name="data_id"),
    )

    df_pass = pd.DataFrame(
        data={
            "col1": [7, 8, 9, 11, 12, 13],
            "col2": ["bar", "bar", "bar", "foo", "foo", "foo"],
        },
        index=pd.Series([1, 2, 3, 4, 5, 6], name="data_id"),
    )

    df = schema.validate(df_pass)
    assert isinstance(df, pd.DataFrame)
    assert len(df.columns) == 2
    assert set(df.columns) == {"col1", "col2"}

    # raise errors.SchemaError when Check fails
github pandera-dev / pandera / tests / test_pandera.py View on Github external
def test_multi_index_index():
    schema = DataFrameSchema(
        columns={
            "column1": Column(Float, Check(lambda s: s > 0)),
            "column2": Column(Float, Check(lambda s: s > 0)),
        },
        index=MultiIndex(
            indexes=[
                Index(Int,
                      Check(lambda s: (s < 5) & (s >= 0)),
                      name="index0"),
                Index(String,
                      Check(lambda s: s.isin(["foo", "bar"])),
                      name="index1"),
            ]
        )
    )

    df = pd.DataFrame(
        data={
            "column1": [0.1, 0.5, 123.1, 10.6, 22.31],
            "column2": [0.1, 0.5, 123.1, 10.6, 22.31],
        },
        index=pd.MultiIndex.from_arrays(
            [[0, 1, 2, 3, 4], ["foo", "bar", "foo", "bar", "foo"]],
github pandera-dev / pandera / tests / test_schemas.py View on Github external
def test_dataframe_schema_str_repr():
    schema = DataFrameSchema(
        columns={
            "col1": Column(Int),
            "col2": Column(String),
            "col3": Column(DateTime),
        },
        index=Index(Int, name="my_index"),

    )

    for x in [schema.__str__(), schema.__repr__()]:
        assert isinstance(x, str)
        assert schema.__class__.__name__ in x
        for name in ["col1", "col2", "col3", "my_index"]:
            assert name in x
github pandera-dev / pandera / tests / test_pandera.py View on Github external
def test_index_schema():
    schema = DataFrameSchema(
        columns={},
        index=Index(
            Int, [
                Check(lambda x: 1 <= x <= 11, element_wise=True),
                Check(lambda index: index.mean() > 1)]
        ))
    df = pd.DataFrame(index=range(1, 11), dtype="int64")
    assert isinstance(schema.validate(df), pd.DataFrame)

    with pytest.raises(errors.SchemaError):
        schema.validate(pd.DataFrame(index=range(1, 20)))
github pandera-dev / pandera / tests / test_pandera.py View on Github external
def test_multi_index_index():
    schema = DataFrameSchema(
        columns={
            "column1": Column(Float, Check(lambda s: s > 0)),
            "column2": Column(Float, Check(lambda s: s > 0)),
        },
        index=MultiIndex(
            indexes=[
                Index(Int,
                      Check(lambda s: (s < 5) & (s >= 0)),
                      name="index0"),
                Index(String,
                      Check(lambda s: s.isin(["foo", "bar"])),
                      name="index1"),
            ]
        )
    )

    df = pd.DataFrame(
        data={
            "column1": [0.1, 0.5, 123.1, 10.6, 22.31],
            "column2": [0.1, 0.5, 123.1, 10.6, 22.31],
        },
        index=pd.MultiIndex.from_arrays(
            [[0, 1, 2, 3, 4], ["foo", "bar", "foo", "bar", "foo"]],
            names=["index0", "index1"],
        )
    )
github pandera-dev / pandera / tests / test_pandera.py View on Github external
def test_dataframe_schema_str_repr():
    schema = DataFrameSchema(
        columns={
            "col1": Column(Int),
            "col2": Column(String),
            "col3": Column(DateTime),
        },
        index=Index(Int, name="my_index"),

    )

    for x in [schema.__str__(), schema.__repr__()]:
        assert isinstance(x, str)
        assert schema.__class__.__name__ in x
        for name in ["col1", "col2", "col3", "my_index"]:
            assert name in x