How to use the patsy.missing.NAAction function in patsy

To help you get started, we’ve selected a few patsy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github bashtage / linearmodels / linearmodels / asset_pricing / model.py View on Github external
def _prepare_data_from_formula(formula, data, portfolios):
        na_action = NAAction(on_NA='raise', NA_types=[])
        orig_formula = formula
        if portfolios is not None:
            factors = dmatrix(formula + ' + 0', data, return_type='dataframe', NA_action=na_action)
        else:
            formula = formula.split('~')
            portfolios = dmatrix(formula[0].strip() + ' + 0', data,
                                 return_type='dataframe', NA_action=na_action)
            factors = dmatrix(formula[1].strip() + ' + 0', data,
                              return_type='dataframe', NA_action=na_action)

        return factors, portfolios, orig_formula
github pydata / patsy / patsy / missing.py View on Github external
def test_NAAction_raise():
    action = NAAction(on_NA="raise")

    # no-NA just passes through:
    in_arrs = [np.asarray([1.1, 1.2]),
               np.asarray([1, 2])]
    is_NAs = [np.asarray([False, False])] * 2
    got_arrs = action.handle_NA(in_arrs, is_NAs, [None, None])
    assert np.array_equal(got_arrs[0], in_arrs[0])
    assert np.array_equal(got_arrs[1], in_arrs[1])

    from patsy.origin import Origin
    o1 = Origin("asdf", 0, 1)
    o2 = Origin("asdf", 2, 3)

    # NA raises an error with a correct origin
    in_idx = np.arange(2)
    in_arrs = [np.asarray([1.1, 1.2]),
github pydata / patsy / patsy / categorical.py View on Github external
def test_categorical_to_int():
    from nose.tools import assert_raises
    from patsy.missing import NAAction
    if have_pandas:
        s = pandas.Series(["a", "b", "c"], index=[10, 20, 30])
        c_pandas = categorical_to_int(s, ("a", "b", "c"), NAAction())
        assert np.all(c_pandas == [0, 1, 2])
        assert np.all(c_pandas.index == [10, 20, 30])
        # Input must be 1-dimensional
        assert_raises(PatsyError,
                      categorical_to_int,
                      pandas.DataFrame({10: s}), ("a", "b", "c"), NAAction())
    if have_pandas_categorical:
        constructors = [pandas_Categorical_from_codes]
        if have_pandas_categorical_dtype:
            def Series_from_codes(codes, categories):
                c = pandas_Categorical_from_codes(codes, categories)
                return pandas.Series(c)
            constructors.append(Series_from_codes)
        for con in constructors:
            cat = con([1, 0, -1], ("a", "b"))
            conv = categorical_to_int(cat, ("a", "b"), NAAction())
github pydata / patsy / patsy / build.py View on Github external
following places:

    * If ``data`` is a :class:`pandas.DataFrame`, then its number of rows.
    * The number of entries in any factors present in any of the design
    * matrices being built.

    All these values much match. In particular, if this function is called to
    generate multiple design matrices at once, then they must all have the
    same number of rows.

    .. versionadded:: 0.2.0
       The ``NA_action`` argument.

    """
    if isinstance(NA_action, str):
        NA_action = NAAction(NA_action)
    if return_type == "dataframe" and not have_pandas:
        raise PatsyError("pandas.DataFrame was requested, but pandas "
                            "is not installed")
    if return_type not in ("matrix", "dataframe"):
        raise PatsyError("unrecognized output type %r, should be "
                            "'matrix' or 'dataframe'" % (return_type,))
    # Evaluate factors
    factor_info_to_values = {}
    factor_info_to_isNAs = {}
    rows_checker = _CheckMatch("Number of rows", lambda a, b: a == b)
    index_checker = _CheckMatch("Index", lambda a, b: a.equals(b))
    if have_pandas and isinstance(data, pandas.DataFrame):
        index_checker.check(data.index, "data.index", None)
        rows_checker.check(data.shape[0], "data argument", None)
    for design_info in design_infos:
        # We look at evaluators rather than factors here, because it might
github pydata / patsy / patsy / categorical.py View on Github external
def t(NA_types, datas, exp_finish_fast, exp_levels, exp_contrast=None):
        sniffer = CategoricalSniffer(NAAction(NA_types=NA_types))
        for data in datas:
            done = sniffer.sniff(data)
            if done:
                assert exp_finish_fast
                break
            else:
                assert not exp_finish_fast
        assert sniffer.levels_contrast() == (exp_levels, exp_contrast)
github pydata / patsy / patsy / missing.py View on Github external
def test_NAAction_drop():
    action = NAAction("drop")
    in_values = [np.asarray([-1, 2, -1, 4, 5]),
                 np.asarray([10.0, 20.0, 30.0, 40.0, 50.0]),
                 np.asarray([[1.0, np.nan],
                             [3.0, 4.0],
                             [10.0, 5.0],
                             [6.0, 7.0],
                             [8.0, np.nan]]),
                 ]
    is_NAs = [np.asarray([True, False, True, False, False]),
              np.zeros(5, dtype=bool),
              np.asarray([True, False, False, False, True]),
              ]
    out_values = action.handle_NA(in_values, is_NAs, [None] * 3)
    assert len(out_values) == 3
    assert np.array_equal(out_values[0], [2, 4])
    assert np.array_equal(out_values[1], [20.0, 40.0])
github pydata / patsy / patsy / build.py View on Github external
assert it.i == 2
    iterations = 0
    assert num_column_counts == {num_1dim: 1, num_1col: 1, num_4col: 4}
    assert cat_levels_contrasts == {
        categ_1col: (("a", "b", "c"), "MOCK CONTRAST"),
        bool_1col: ((False, True), None),
        string_1col: (("a", "b", "c"), None),
        object_1col: (tuple(sorted(object_levels, key=id)), None),
        }

    # Check that it doesn't read through all the data if that's not necessary:
    it = DataIterMaker()
    no_read_necessary = [num_1dim, num_1col, num_4col, categ_1col, bool_1col]
    (num_column_counts, cat_levels_contrasts,
     ) = _examine_factor_types(no_read_necessary, factor_states, it,
                               NAAction())
    assert it.i == 0
    assert num_column_counts == {num_1dim: 1, num_1col: 1, num_4col: 4}
    assert cat_levels_contrasts == {
        categ_1col: (("a", "b", "c"), "MOCK CONTRAST"),
        bool_1col: ((False, True), None),
        }

    # Illegal inputs:
    bool_3col = MockFactor()
    num_3dim = MockFactor()
    # no such thing as a multi-dimensional Categorical
    # categ_3dim = MockFactor()
    string_3col = MockFactor()
    object_3col = MockFactor()
    illegal_factor_states = {
        num_3dim: (np.zeros((3, 3, 3)), np.ones((3, 3, 3))),
github pydata / patsy / patsy / build.py View on Github external
def test__eval_factor_categorical():
    from nose.tools import assert_raises
    from patsy.categorical import C
    naa = NAAction()
    f = _MockFactor()
    fi1 = FactorInfo(f, "categorical",
                     {}, num_columns=None, categories=("a", "b"))
    assert fi1.factor is f
    cat1, _ = _eval_factor(fi1, {"mock": ["b", "a", "b"]}, naa)
    assert cat1.shape == (3,)
    assert np.all(cat1 == [1, 0, 1])
    assert_raises(PatsyError, _eval_factor, fi1, {"mock": ["c"]}, naa)
    assert_raises(PatsyError, _eval_factor, fi1, {"mock": C(["a", "c"])}, naa)
    assert_raises(PatsyError, _eval_factor, fi1,
                  {"mock": C(["a", "b"], levels=["b", "a"])}, naa)
    assert_raises(PatsyError, _eval_factor, fi1, {"mock": [1, 0, 1]}, naa)
    bad_cat = np.asarray(["b", "a", "a", "b"])
    bad_cat.resize((2, 2))
    assert_raises(PatsyError, _eval_factor, fi1, {"mock": bad_cat}, naa)
github bashtage / linearmodels / linearmodels / panel / model.py View on Github external
def __init__(self, formula, data, eval_env=2):
        self._formula = formula
        self._data = PanelData(data, convert_dummies=False, copy=False)
        self._na_action = NAAction(on_NA='raise', NA_types=[])
        self._eval_env = eval_env
        self._dependent = self._exog = None
        self._parse()
github pydata / patsy / patsy / build.py View on Github external
{}, num_columns=2, categories=None)
    eval123321, is_NA = _eval_factor(fi2,
                                     {"mock": [[1, 3], [2, 2], [3, 1]]},
                                     naa)
    assert eval123321.shape == (3, 2)
    assert np.all(eval123321 == [[1, 3], [2, 2], [3, 1]])
    assert is_NA.shape == (3,)
    assert np.all(~is_NA)
    assert_raises(PatsyError, _eval_factor, fi2, {"mock": [1, 2, 3]}, naa)
    assert_raises(PatsyError, _eval_factor, fi2, {"mock": [[1, 2, 3]]}, naa)

    ev_nan, is_NA = _eval_factor(fi1, {"mock": [1, 2, np.nan]},
                                 NAAction(NA_types=["NaN"]))
    assert np.array_equal(is_NA, [False, False, True])
    ev_nan, is_NA = _eval_factor(fi1, {"mock": [1, 2, np.nan]},
                                 NAAction(NA_types=[]))
    assert np.array_equal(is_NA, [False, False, False])

    if have_pandas:
        eval_ser, _ = _eval_factor(fi1,
                                   {"mock":
                                    pandas.Series([1, 2, 3],
                                                  index=[10, 20, 30])},
                                   naa)
        assert isinstance(eval_ser, pandas.DataFrame)
        assert np.array_equal(eval_ser, [[1], [2], [3]])
        assert np.array_equal(eval_ser.index, [10, 20, 30])
        eval_df1, _ = _eval_factor(fi1,
                                   {"mock":
                                    pandas.DataFrame([[2], [1], [3]],
                                                     index=[20, 10, 30])},
                                   naa)