How to use the pandera.errors.SchemaError function in pandera

To help you get started, we’ve selected a few pandera examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pandera-dev / pandera / tests / test_checks.py View on Github external
data={
            "col1": [7, 8, 9, 11, 1, 13],
            "col2": ["bar", "bar", "bar", "foo", "foo", "foo"],
        },
        index=pd.Series([1, 2, 3, 4, 5, 6], name="data_id"),
    )
    # raise errors.SchemaError when groupby column doesn't exist
    df_fail_no_column = pd.DataFrame(
        data={
            "col1": [7, 8, 20, 11, 12, 13],
        },
        index=pd.Series([1, 2, 3, 4, 5, 6], name="data_id"),
    )

    for df in [df_fail_on_bar, df_fail_on_foo, df_fail_no_column]:
        with pytest.raises(errors.SchemaError):
            schema.validate(df)
github pandera-dev / pandera / tests / test_schemas.py View on Github external
int_schema = SeriesSchema(
        Int, Check(lambda x: 0 <= x <= 100, element_wise=True))
    assert isinstance(int_schema.validate(
        pd.Series([0, 30, 50, 100])), pd.Series)

    str_schema = SeriesSchema(
        String, Check(lambda s: s.isin(["foo", "bar", "baz"])),
        nullable=True, coerce=True)
    assert isinstance(str_schema.validate(
        pd.Series(["foo", "bar", "baz", None])), pd.Series)
    assert isinstance(str_schema.validate(
        pd.Series(["foo", "bar", "baz", np.nan])), pd.Series)

    # error cases
    for data in [-1, 101, 50.1, "foo"]:
        with pytest.raises(errors.SchemaError):
            int_schema.validate(pd.Series([data]))

    for data in [-1, {"a": 1}, -1.0]:
        with pytest.raises(TypeError):
            int_schema.validate(TypeError)

    non_duplicate_schema = SeriesSchema(
        Int, allow_duplicates=False)
    with pytest.raises(errors.SchemaError):
        non_duplicate_schema.validate(pd.Series([0, 1, 2, 3, 4, 1]))

    # when series name doesn't match schema
    named_schema = SeriesSchema(Int, name="my_series")
    with pytest.raises(
            errors.SchemaError,
            match=r"^Expected .+ to have name"):
github pandera-dev / pandera / tests / test_dtypes.py View on Github external
def test_category_dtype_coerce():

    columns = {
        "col": Column(
            dtypes.Category,
            checks=Check(lambda s: set(s) == {"A", "B", "C"}),
            nullable=False
        ),
    }

    with pytest.raises(SchemaError):
        DataFrameSchema(columns=columns, coerce=False).validate(
            pd.DataFrame(
                {"col": pd.Series(["A", "B", "A", "B", "C"], dtype="object")}
            )
        )

    validated_df = DataFrameSchema(columns=columns, coerce=True).validate(
        pd.DataFrame(
            {"col": pd.Series(["A", "B", "A", "B", "C"], dtype="object")}
        )
    )
    assert isinstance(validated_df, pd.DataFrame)
github pandera-dev / pandera / pandera / schemas.py View on Github external
if any(duplicates):
                raise errors.SchemaError(
                    "series '%s' contains duplicate values: %s" %
                    (series.name,
                     series[duplicates].head(
                         constants.N_FAILURE_CASES).to_dict()))

        try:
            series.dtype == _dtype
        except TypeError:
            types_not_matching = True
        else:
            types_not_matching = series.dtype != _dtype

        if _dtype is not None and types_not_matching:
            raise errors.SchemaError(
                "expected series '%s' to have type %s, got %s" %
                (series.name, _dtype, series.dtype))

        val_results = []
        for check_index, check in enumerate(self.checks):
            val_results.append(
                check(
                    self,
                    check_index,
                    check._prepare_series_input(series, dataframe_context)))
        return all(val_results)
github pandera-dev / pandera / pandera / decorators.py View on Github external
"""Check pandas DataFrame or Series before calling the function.

        :param fn: check the DataFrame or Series input of this function
        :param instance: the object to which the wrapped function was bound
            when it was called. Only applies to methods.
        :param args: the list of positional arguments supplied when the
            decorated function was called.
        :param kwargs: the dictionary of keyword arguments supplied when the
            decorated function was called.
        """
        args = list(args)
        if isinstance(obj_getter, int):
            try:
                args[obj_getter] = schema.validate(args[obj_getter])
            except IndexError as e:
                raise errors.SchemaError(
                    "error in check_input decorator of function '%s': the "
                    "index '%s' was supplied to the check but this "
                    "function accepts '%s' arguments, so the maximum "
                    "index is '%s'. The full error is: '%s'" %
                    (fn.__name__,
                     obj_getter,
                     len(_get_fn_argnames(fn)),
                     max(0, len(_get_fn_argnames(fn))-1),
                     e
                     )
                    )
        elif isinstance(obj_getter, str):
            if obj_getter in kwargs:
                kwargs[obj_getter] = schema.validate(kwargs[obj_getter])
            else:
                arg_spec_args = _get_fn_argnames(fn)
github pandera-dev / pandera / pandera / hypotheses.py View on Github external
...     pd.DataFrame({
        ...         "height_in_feet": [8.1, 7, 5.2, 5.1, 4],
        ...         "group": ["A", "A", "B", "B", "B"]
        ...     })
        ... )
        >>> schema.validate(df)[["height_in_feet", "group"]]
           height_in_feet group
        0             8.1     A
        1             7.0     A
        2             5.2     B
        3             5.1     B
        4             4.0     B

        """
        if relationship not in cls._RELATIONSHIPS:
            raise errors.SchemaError(
                "relationship must be one of %s" % set(cls._RELATIONSHIPS))
        return cls(
            test=stats.ttest_ind,
            samples=[sample1, sample2],
            groupby=groupby,
            relationship=relationship,
            test_kwargs={"equal_var": equal_var, "nan_policy": nan_policy},
            relationship_kwargs={"alpha": alpha},
            error="failed two sample ttest between '%s' and '%s'" % (
                sample1, sample2),
        )
github pandera-dev / pandera / pandera / schemas.py View on Github external
"non-nullable series contains null values: %s" %
                        (series.name, self._pandas_dtype.value, series.dtype,
                         series[nulls].head(
                             constants.N_FAILURE_CASES).to_dict()))
                else:
                    raise errors.SchemaError(
                        "non-nullable series '%s' contains null values: %s" %
                        (series.name,
                         series[nulls].head(
                             constants.N_FAILURE_CASES).to_dict()))

        # Check if the series contains duplicate values
        if not self._allow_duplicates:
            duplicates = series.duplicated()
            if any(duplicates):
                raise errors.SchemaError(
                    "series '%s' contains duplicate values: %s" %
                    (series.name,
                     series[duplicates].head(
                         constants.N_FAILURE_CASES).to_dict()))

        try:
            series.dtype == _dtype
        except TypeError:
            types_not_matching = True
        else:
            types_not_matching = series.dtype != _dtype

        if _dtype is not None and types_not_matching:
            raise errors.SchemaError(
                "expected series '%s' to have type %s, got %s" %
                (series.name, _dtype, series.dtype))