Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def t(formula_like, data, depth,
expect_full_designs,
expected_rhs_values, expected_rhs_names,
expected_lhs_values=None, expected_lhs_names=None): # pragma: no cover
if isinstance(depth, int):
depth += 1
def data_iter_maker():
return iter([data])
if (isinstance(formula_like, six.string_types + (ModelDesc, DesignInfo))
or (isinstance(formula_like, tuple)
and isinstance(formula_like[0], DesignInfo))
or hasattr(formula_like, "__patsy_get_model_desc__")):
if expected_lhs_values is None:
builder = incr_dbuilder(formula_like, data_iter_maker, depth)
lhs = None
(rhs,) = build_design_matrices([builder], data)
else:
builders = incr_dbuilders(formula_like, data_iter_maker, depth)
lhs, rhs = build_design_matrices(builders, data)
check_result(expect_full_designs, lhs, rhs, data,
expected_rhs_values, expected_rhs_names,
expected_lhs_values, expected_lhs_names)
else:
assert_raises(PatsyError, incr_dbuilders,
formula_like, data_iter_maker)
assert_raises(PatsyError, incr_dbuilder,
formula_like, data_iter_maker)
t("~ 0 + y", ["y"], [1, 2])
t(["y"], ["y"], [1, 2])
t([six.text_type("y")], ["y"], [1, 2])
t([all_terms[1]], ["y"], [1, 2])
# Formula can't have a LHS
assert_raises(PatsyError, all_builder.subset, "a ~ a")
# Term must exist
assert_raises(KeyError, all_builder.subset, "~ asdf")
assert_raises(KeyError, all_builder.subset, ["asdf"])
assert_raises(KeyError,
all_builder.subset, [Term(["asdf"])])
# Also check for a minimal DesignInfo (column names only)
min_di = DesignInfo(["a", "b", "c"])
min_di_subset = min_di.subset(["c", "a"])
assert min_di_subset.column_names == ["c", "a"]
assert min_di_subset.terms is None
then this will be used to construct them.
:returns: a DesignInfo object
"""
if hasattr(array_like, "design_info") and isinstance(array_like.design_info, cls):
return array_like.design_info
arr = atleast_2d_column_default(array_like, preserve_pandas=True)
if arr.ndim > 2:
raise ValueError("design matrix can't have >2 dimensions")
columns = getattr(arr, "columns", range(arr.shape[1]))
if (hasattr(columns, "dtype")
and not safe_issubdtype(columns.dtype, np.integer)):
column_names = [str(obj) for obj in columns]
else:
column_names = ["%s%s" % (default_column_prefix, i)
for i in columns]
return DesignInfo(column_names)
assert di.slice("x2") == slice(1, 2)
assert di.slice("x3") == slice(2, 3)
assert di.slice("x") == slice(0, 3)
assert di.slice(t_x) == slice(0, 3)
assert di.slice("y") == slice(3, 4)
assert di.slice(t_y) == slice(3, 4)
assert di.slice(slice(2, 4)) == slice(2, 4)
assert_raises(PatsyError, di.slice, "asdf")
# smoke test
repr(di)
assert_no_pickling(di)
# One without term objects
di = DesignInfo(["a1", "a2", "a3", "b"])
assert di.column_names == ["a1", "a2", "a3", "b"]
assert di.term_names == ["a1", "a2", "a3", "b"]
assert di.terms is None
assert di.column_name_indexes == {"a1": 0, "a2": 1, "a3": 2, "b": 3}
assert di.term_name_slices == {"a1": slice(0, 1),
"a2": slice(1, 2),
"a3": slice(2, 3),
"b": slice(3, 4)}
assert di.term_slices is None
assert di.describe() == "a1 + a2 + a3 + b"
assert di.slice(1) == slice(1, 2)
assert di.slice("a1") == slice(0, 1)
assert di.slice("a2") == slice(1, 2)
assert di.slice("a3") == slice(2, 3)
assert di.slice("b") == slice(3, 4)
def test_design_matrix():
from nose.tools import assert_raises
di = DesignInfo(["a1", "a2", "a3", "b"])
mm = DesignMatrix([[12, 14, 16, 18]], di)
assert mm.design_info.column_names == ["a1", "a2", "a3", "b"]
bad_di = DesignInfo(["a1"])
assert_raises(ValueError, DesignMatrix, [[12, 14, 16, 18]], bad_di)
mm2 = DesignMatrix([[12, 14, 16, 18]])
assert mm2.design_info.column_names == ["column0", "column1", "column2",
"column3"]
mm3 = DesignMatrix([12, 14, 16, 18])
assert mm3.shape == (4, 1)
# DesignMatrix always has exactly 2 dimensions
assert_raises(ValueError, DesignMatrix, [[[1]]])
assert di.column_name_indexes == {"a1": 0, "a2": 1, "a3": 2, "b": 3}
assert di.term_name_slices == {"a1": slice(0, 1),
"a2": slice(1, 2),
"a3": slice(2, 3),
"b": slice(3, 4)}
assert di.term_slices is None
assert di.describe() == "a1 + a2 + a3 + b"
assert di.slice(1) == slice(1, 2)
assert di.slice("a1") == slice(0, 1)
assert di.slice("a2") == slice(1, 2)
assert di.slice("a3") == slice(2, 3)
assert di.slice("b") == slice(3, 4)
# Check intercept handling in describe()
assert DesignInfo(["Intercept", "a", "b"]).describe() == "1 + a + b"
# Failure modes
# must specify either both or neither of factor_infos and term_codings:
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y"], factor_infos=factor_infos)
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y"], term_codings=term_codings)
# factor_infos must be a dict
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y"], list(factor_infos), term_codings)
# wrong number of column names:
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y1", "y2"], factor_infos, term_codings)
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3"], factor_infos, term_codings)
# name overlap problems
def test_design_matrix():
from nose.tools import assert_raises
di = DesignInfo(["a1", "a2", "a3", "b"])
mm = DesignMatrix([[12, 14, 16, 18]], di)
assert mm.design_info.column_names == ["a1", "a2", "a3", "b"]
bad_di = DesignInfo(["a1"])
assert_raises(ValueError, DesignMatrix, [[12, 14, 16, 18]], bad_di)
mm2 = DesignMatrix([[12, 14, 16, 18]])
assert mm2.design_info.column_names == ["column0", "column1", "column2",
"column3"]
mm3 = DesignMatrix([12, 14, 16, 18])
assert mm3.shape == (4, 1)
# DesignMatrix always has exactly 2 dimensions
assert_raises(ValueError, DesignMatrix, [[[1]]])
# DesignMatrix constructor passes through existing DesignMatrixes
mm4 = DesignMatrix(mm)
assert mm4 is mm
# But not if they are really slices:
term_name_to_term = {}
for term in self.term_codings:
term_name_to_term[term.name()] = term
new_column_names = []
new_factor_infos = {}
new_term_codings = OrderedDict()
for name_or_term in which_terms:
term = term_name_to_term.get(name_or_term, name_or_term)
# If the name is unknown we just let the KeyError escape
s = self.term_slices[term]
new_column_names += self.column_names[s]
for f in term.factors:
new_factor_infos[f] = self.factor_infos[f]
new_term_codings[term] = self.term_codings[term]
return DesignInfo(new_column_names,
factor_infos=new_factor_infos,
term_codings=new_term_codings)
orig_index = None
if return_type == "dataframe":
m = atleast_2d_column_default(m, preserve_pandas=True)
m = pandas.DataFrame(m)
m.columns = di.column_names
m.design_info = di
return (m, orig_index)
else:
return (DesignMatrix(m, di), orig_index)
rhs, rhs_orig_index = _regularize_matrix(rhs, "x")
if lhs is None:
lhs = np.zeros((rhs.shape[0], 0), dtype=float)
lhs, lhs_orig_index = _regularize_matrix(lhs, "y")
assert isinstance(getattr(lhs, "design_info", None), DesignInfo)
assert isinstance(getattr(rhs, "design_info", None), DesignInfo)
if lhs.shape[0] != rhs.shape[0]:
raise PatsyError("shape mismatch: outcome matrix has %s rows, "
"predictor matrix has %s rows"
% (lhs.shape[0], rhs.shape[0]))
if rhs_orig_index is not None and lhs_orig_index is not None:
if not rhs_orig_index.equals(lhs_orig_index):
raise PatsyError("index mismatch: outcome and "
"predictor have incompatible indexes")
if return_type == "dataframe":
if rhs_orig_index is not None and lhs_orig_index is None:
lhs.index = rhs.index
if rhs_orig_index is None and lhs_orig_index is not None:
rhs.index = lhs.index
return (lhs, rhs)
# Check intercept handling in describe()
assert DesignInfo(["Intercept", "a", "b"]).describe() == "1 + a + b"
# Failure modes
# must specify either both or neither of factor_infos and term_codings:
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y"], factor_infos=factor_infos)
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y"], term_codings=term_codings)
# factor_infos must be a dict
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y"], list(factor_infos), term_codings)
# wrong number of column names:
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3", "y1", "y2"], factor_infos, term_codings)
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3"], factor_infos, term_codings)
# name overlap problems
assert_raises(ValueError, DesignInfo,
["x1", "x2", "y", "y2"], factor_infos, term_codings)
# duplicate name
assert_raises(ValueError, DesignInfo,
["x1", "x1", "x1", "y"], factor_infos, term_codings)
# f_y is in factor_infos, but not mentioned in any term
term_codings_x_only = OrderedDict(term_codings)
del term_codings_x_only[t_y]
assert_raises(ValueError, DesignInfo,
["x1", "x2", "x3"], factor_infos, term_codings_x_only)
# f_a is in a term, but not in factor_infos
f_a = _MockFactor("a")