Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _prepare_data_from_formula(formula, data, portfolios):
na_action = NAAction(on_NA='raise', NA_types=[])
orig_formula = formula
if portfolios is not None:
factors = dmatrix(formula + ' + 0', data, return_type='dataframe', NA_action=na_action)
else:
formula = formula.split('~')
portfolios = dmatrix(formula[0].strip() + ' + 0', data,
return_type='dataframe', NA_action=na_action)
factors = dmatrix(formula[1].strip() + ' + 0', data,
return_type='dataframe', NA_action=na_action)
return factors, portfolios, orig_formula
def test_NAAction_raise():
action = NAAction(on_NA="raise")
# no-NA just passes through:
in_arrs = [np.asarray([1.1, 1.2]),
np.asarray([1, 2])]
is_NAs = [np.asarray([False, False])] * 2
got_arrs = action.handle_NA(in_arrs, is_NAs, [None, None])
assert np.array_equal(got_arrs[0], in_arrs[0])
assert np.array_equal(got_arrs[1], in_arrs[1])
from patsy.origin import Origin
o1 = Origin("asdf", 0, 1)
o2 = Origin("asdf", 2, 3)
# NA raises an error with a correct origin
in_idx = np.arange(2)
in_arrs = [np.asarray([1.1, 1.2]),
def test_categorical_to_int():
from nose.tools import assert_raises
from patsy.missing import NAAction
if have_pandas:
s = pandas.Series(["a", "b", "c"], index=[10, 20, 30])
c_pandas = categorical_to_int(s, ("a", "b", "c"), NAAction())
assert np.all(c_pandas == [0, 1, 2])
assert np.all(c_pandas.index == [10, 20, 30])
# Input must be 1-dimensional
assert_raises(PatsyError,
categorical_to_int,
pandas.DataFrame({10: s}), ("a", "b", "c"), NAAction())
if have_pandas_categorical:
constructors = [pandas_Categorical_from_codes]
if have_pandas_categorical_dtype:
def Series_from_codes(codes, categories):
c = pandas_Categorical_from_codes(codes, categories)
return pandas.Series(c)
constructors.append(Series_from_codes)
for con in constructors:
cat = con([1, 0, -1], ("a", "b"))
conv = categorical_to_int(cat, ("a", "b"), NAAction())
following places:
* If ``data`` is a :class:`pandas.DataFrame`, then its number of rows.
* The number of entries in any factors present in any of the design
* matrices being built.
All these values much match. In particular, if this function is called to
generate multiple design matrices at once, then they must all have the
same number of rows.
.. versionadded:: 0.2.0
The ``NA_action`` argument.
"""
if isinstance(NA_action, str):
NA_action = NAAction(NA_action)
if return_type == "dataframe" and not have_pandas:
raise PatsyError("pandas.DataFrame was requested, but pandas "
"is not installed")
if return_type not in ("matrix", "dataframe"):
raise PatsyError("unrecognized output type %r, should be "
"'matrix' or 'dataframe'" % (return_type,))
# Evaluate factors
factor_info_to_values = {}
factor_info_to_isNAs = {}
rows_checker = _CheckMatch("Number of rows", lambda a, b: a == b)
index_checker = _CheckMatch("Index", lambda a, b: a.equals(b))
if have_pandas and isinstance(data, pandas.DataFrame):
index_checker.check(data.index, "data.index", None)
rows_checker.check(data.shape[0], "data argument", None)
for design_info in design_infos:
# We look at evaluators rather than factors here, because it might
def t(NA_types, datas, exp_finish_fast, exp_levels, exp_contrast=None):
sniffer = CategoricalSniffer(NAAction(NA_types=NA_types))
for data in datas:
done = sniffer.sniff(data)
if done:
assert exp_finish_fast
break
else:
assert not exp_finish_fast
assert sniffer.levels_contrast() == (exp_levels, exp_contrast)
def test_NAAction_drop():
action = NAAction("drop")
in_values = [np.asarray([-1, 2, -1, 4, 5]),
np.asarray([10.0, 20.0, 30.0, 40.0, 50.0]),
np.asarray([[1.0, np.nan],
[3.0, 4.0],
[10.0, 5.0],
[6.0, 7.0],
[8.0, np.nan]]),
]
is_NAs = [np.asarray([True, False, True, False, False]),
np.zeros(5, dtype=bool),
np.asarray([True, False, False, False, True]),
]
out_values = action.handle_NA(in_values, is_NAs, [None] * 3)
assert len(out_values) == 3
assert np.array_equal(out_values[0], [2, 4])
assert np.array_equal(out_values[1], [20.0, 40.0])
assert it.i == 2
iterations = 0
assert num_column_counts == {num_1dim: 1, num_1col: 1, num_4col: 4}
assert cat_levels_contrasts == {
categ_1col: (("a", "b", "c"), "MOCK CONTRAST"),
bool_1col: ((False, True), None),
string_1col: (("a", "b", "c"), None),
object_1col: (tuple(sorted(object_levels, key=id)), None),
}
# Check that it doesn't read through all the data if that's not necessary:
it = DataIterMaker()
no_read_necessary = [num_1dim, num_1col, num_4col, categ_1col, bool_1col]
(num_column_counts, cat_levels_contrasts,
) = _examine_factor_types(no_read_necessary, factor_states, it,
NAAction())
assert it.i == 0
assert num_column_counts == {num_1dim: 1, num_1col: 1, num_4col: 4}
assert cat_levels_contrasts == {
categ_1col: (("a", "b", "c"), "MOCK CONTRAST"),
bool_1col: ((False, True), None),
}
# Illegal inputs:
bool_3col = MockFactor()
num_3dim = MockFactor()
# no such thing as a multi-dimensional Categorical
# categ_3dim = MockFactor()
string_3col = MockFactor()
object_3col = MockFactor()
illegal_factor_states = {
num_3dim: (np.zeros((3, 3, 3)), np.ones((3, 3, 3))),
def test__eval_factor_categorical():
from nose.tools import assert_raises
from patsy.categorical import C
naa = NAAction()
f = _MockFactor()
fi1 = FactorInfo(f, "categorical",
{}, num_columns=None, categories=("a", "b"))
assert fi1.factor is f
cat1, _ = _eval_factor(fi1, {"mock": ["b", "a", "b"]}, naa)
assert cat1.shape == (3,)
assert np.all(cat1 == [1, 0, 1])
assert_raises(PatsyError, _eval_factor, fi1, {"mock": ["c"]}, naa)
assert_raises(PatsyError, _eval_factor, fi1, {"mock": C(["a", "c"])}, naa)
assert_raises(PatsyError, _eval_factor, fi1,
{"mock": C(["a", "b"], levels=["b", "a"])}, naa)
assert_raises(PatsyError, _eval_factor, fi1, {"mock": [1, 0, 1]}, naa)
bad_cat = np.asarray(["b", "a", "a", "b"])
bad_cat.resize((2, 2))
assert_raises(PatsyError, _eval_factor, fi1, {"mock": bad_cat}, naa)
def __init__(self, formula, data, eval_env=2):
self._formula = formula
self._data = PanelData(data, convert_dummies=False, copy=False)
self._na_action = NAAction(on_NA='raise', NA_types=[])
self._eval_env = eval_env
self._dependent = self._exog = None
self._parse()
{}, num_columns=2, categories=None)
eval123321, is_NA = _eval_factor(fi2,
{"mock": [[1, 3], [2, 2], [3, 1]]},
naa)
assert eval123321.shape == (3, 2)
assert np.all(eval123321 == [[1, 3], [2, 2], [3, 1]])
assert is_NA.shape == (3,)
assert np.all(~is_NA)
assert_raises(PatsyError, _eval_factor, fi2, {"mock": [1, 2, 3]}, naa)
assert_raises(PatsyError, _eval_factor, fi2, {"mock": [[1, 2, 3]]}, naa)
ev_nan, is_NA = _eval_factor(fi1, {"mock": [1, 2, np.nan]},
NAAction(NA_types=["NaN"]))
assert np.array_equal(is_NA, [False, False, True])
ev_nan, is_NA = _eval_factor(fi1, {"mock": [1, 2, np.nan]},
NAAction(NA_types=[]))
assert np.array_equal(is_NA, [False, False, False])
if have_pandas:
eval_ser, _ = _eval_factor(fi1,
{"mock":
pandas.Series([1, 2, 3],
index=[10, 20, 30])},
naa)
assert isinstance(eval_ser, pandas.DataFrame)
assert np.array_equal(eval_ser, [[1], [2], [3]])
assert np.array_equal(eval_ser.index, [10, 20, 30])
eval_df1, _ = _eval_factor(fi1,
{"mock":
pandas.DataFrame([[2], [1], [3]],
index=[20, 10, 30])},
naa)