How to use the patsy.design_info.DesignInfo function in patsy

To help you get started, we’ve selected a few patsy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pydata / patsy / patsy / test_highlevel.py View on Github external
def t(formula_like, data, depth,
      expect_full_designs,
      expected_rhs_values, expected_rhs_names,
      expected_lhs_values=None, expected_lhs_names=None): # pragma: no cover
    if isinstance(depth, int):
        depth += 1
    def data_iter_maker():
        return iter([data])
    if (isinstance(formula_like, six.string_types + (ModelDesc, DesignInfo))
        or (isinstance(formula_like, tuple)
            and isinstance(formula_like[0], DesignInfo))
        or hasattr(formula_like, "__patsy_get_model_desc__")):
        if expected_lhs_values is None:
            builder = incr_dbuilder(formula_like, data_iter_maker, depth)
            lhs = None
            (rhs,) = build_design_matrices([builder], data)
        else:
            builders = incr_dbuilders(formula_like, data_iter_maker, depth)
            lhs, rhs = build_design_matrices(builders, data)
        check_result(expect_full_designs, lhs, rhs, data,
                     expected_rhs_values, expected_rhs_names,
                     expected_lhs_values, expected_lhs_names)
    else:
        assert_raises(PatsyError, incr_dbuilders,
                      formula_like, data_iter_maker)
        assert_raises(PatsyError, incr_dbuilder,
                      formula_like, data_iter_maker)
github pydata / patsy / patsy / test_build.py View on Github external
t("~ 0 + y", ["y"], [1, 2])
    t(["y"], ["y"], [1, 2])
    t([six.text_type("y")], ["y"], [1, 2])
    t([all_terms[1]], ["y"], [1, 2])

    # Formula can't have a LHS
    assert_raises(PatsyError, all_builder.subset, "a ~ a")
    # Term must exist
    assert_raises(KeyError, all_builder.subset, "~ asdf")
    assert_raises(KeyError, all_builder.subset, ["asdf"])
    assert_raises(KeyError,
                  all_builder.subset, [Term(["asdf"])])

    # Also check for a minimal DesignInfo (column names only)
    min_di = DesignInfo(["a", "b", "c"])
    min_di_subset = min_di.subset(["c", "a"])
    assert min_di_subset.column_names == ["c", "a"]
    assert min_di_subset.terms is None
github pydata / patsy / patsy / design_info.py View on Github external
then this will be used to construct them.
        :returns: a DesignInfo object
        """
        if hasattr(array_like, "design_info") and isinstance(array_like.design_info, cls):
            return array_like.design_info
        arr = atleast_2d_column_default(array_like, preserve_pandas=True)
        if arr.ndim > 2:
            raise ValueError("design matrix can't have >2 dimensions")
        columns = getattr(arr, "columns", range(arr.shape[1]))
        if (hasattr(columns, "dtype")
            and not safe_issubdtype(columns.dtype, np.integer)):
            column_names = [str(obj) for obj in columns]
        else:
            column_names = ["%s%s" % (default_column_prefix, i)
                            for i in columns]
        return DesignInfo(column_names)
github pydata / patsy / patsy / design_info.py View on Github external
assert di.slice("x2") == slice(1, 2)
    assert di.slice("x3") == slice(2, 3)
    assert di.slice("x") == slice(0, 3)
    assert di.slice(t_x) == slice(0, 3)
    assert di.slice("y") == slice(3, 4)
    assert di.slice(t_y) == slice(3, 4)
    assert di.slice(slice(2, 4)) == slice(2, 4)
    assert_raises(PatsyError, di.slice, "asdf")

    # smoke test
    repr(di)

    assert_no_pickling(di)

    # One without term objects
    di = DesignInfo(["a1", "a2", "a3", "b"])
    assert di.column_names == ["a1", "a2", "a3", "b"]
    assert di.term_names == ["a1", "a2", "a3", "b"]
    assert di.terms is None
    assert di.column_name_indexes == {"a1": 0, "a2": 1, "a3": 2, "b": 3}
    assert di.term_name_slices == {"a1": slice(0, 1),
                                   "a2": slice(1, 2),
                                   "a3": slice(2, 3),
                                   "b": slice(3, 4)}
    assert di.term_slices is None
    assert di.describe() == "a1 + a2 + a3 + b"

    assert di.slice(1) == slice(1, 2)
    assert di.slice("a1") == slice(0, 1)
    assert di.slice("a2") == slice(1, 2)
    assert di.slice("a3") == slice(2, 3)
    assert di.slice("b") == slice(3, 4)
github pydata / patsy / patsy / design_info.py View on Github external
def test_design_matrix():
    from nose.tools import assert_raises

    di = DesignInfo(["a1", "a2", "a3", "b"])
    mm = DesignMatrix([[12, 14, 16, 18]], di)
    assert mm.design_info.column_names == ["a1", "a2", "a3", "b"]

    bad_di = DesignInfo(["a1"])
    assert_raises(ValueError, DesignMatrix, [[12, 14, 16, 18]], bad_di)

    mm2 = DesignMatrix([[12, 14, 16, 18]])
    assert mm2.design_info.column_names == ["column0", "column1", "column2",
                                            "column3"]

    mm3 = DesignMatrix([12, 14, 16, 18])
    assert mm3.shape == (4, 1)

    # DesignMatrix always has exactly 2 dimensions
    assert_raises(ValueError, DesignMatrix, [[[1]]])
github pydata / patsy / patsy / design_info.py View on Github external
assert di.column_name_indexes == {"a1": 0, "a2": 1, "a3": 2, "b": 3}
    assert di.term_name_slices == {"a1": slice(0, 1),
                                   "a2": slice(1, 2),
                                   "a3": slice(2, 3),
                                   "b": slice(3, 4)}
    assert di.term_slices is None
    assert di.describe() == "a1 + a2 + a3 + b"

    assert di.slice(1) == slice(1, 2)
    assert di.slice("a1") == slice(0, 1)
    assert di.slice("a2") == slice(1, 2)
    assert di.slice("a3") == slice(2, 3)
    assert di.slice("b") == slice(3, 4)

    # Check intercept handling in describe()
    assert DesignInfo(["Intercept", "a", "b"]).describe() == "1 + a + b"

    # Failure modes
    # must specify either both or neither of factor_infos and term_codings:
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], factor_infos=factor_infos)
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], term_codings=term_codings)
    # factor_infos must be a dict
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], list(factor_infos), term_codings)
    # wrong number of column names:
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y1", "y2"], factor_infos, term_codings)
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3"], factor_infos, term_codings)
    # name overlap problems
github pydata / patsy / patsy / design_info.py View on Github external
def test_design_matrix():
    from nose.tools import assert_raises

    di = DesignInfo(["a1", "a2", "a3", "b"])
    mm = DesignMatrix([[12, 14, 16, 18]], di)
    assert mm.design_info.column_names == ["a1", "a2", "a3", "b"]

    bad_di = DesignInfo(["a1"])
    assert_raises(ValueError, DesignMatrix, [[12, 14, 16, 18]], bad_di)

    mm2 = DesignMatrix([[12, 14, 16, 18]])
    assert mm2.design_info.column_names == ["column0", "column1", "column2",
                                            "column3"]

    mm3 = DesignMatrix([12, 14, 16, 18])
    assert mm3.shape == (4, 1)

    # DesignMatrix always has exactly 2 dimensions
    assert_raises(ValueError, DesignMatrix, [[[1]]])

    # DesignMatrix constructor passes through existing DesignMatrixes
    mm4 = DesignMatrix(mm)
    assert mm4 is mm
    # But not if they are really slices:
github pydata / patsy / patsy / design_info.py View on Github external
term_name_to_term = {}
            for term in self.term_codings:
                term_name_to_term[term.name()] = term

            new_column_names = []
            new_factor_infos = {}
            new_term_codings = OrderedDict()
            for name_or_term in which_terms:
                term = term_name_to_term.get(name_or_term, name_or_term)
                # If the name is unknown we just let the KeyError escape
                s = self.term_slices[term]
                new_column_names += self.column_names[s]
                for f in term.factors:
                    new_factor_infos[f] = self.factor_infos[f]
                new_term_codings[term] = self.term_codings[term]
            return DesignInfo(new_column_names,
                              factor_infos=new_factor_infos,
                              term_codings=new_term_codings)
github pydata / patsy / patsy / highlevel.py View on Github external
orig_index = None
            if return_type == "dataframe":
                m = atleast_2d_column_default(m, preserve_pandas=True)
                m = pandas.DataFrame(m)
                m.columns = di.column_names
                m.design_info = di
                return (m, orig_index)
            else:
                return (DesignMatrix(m, di), orig_index)
        rhs, rhs_orig_index = _regularize_matrix(rhs, "x")
        if lhs is None:
            lhs = np.zeros((rhs.shape[0], 0), dtype=float)
        lhs, lhs_orig_index = _regularize_matrix(lhs, "y")

        assert isinstance(getattr(lhs, "design_info", None), DesignInfo)
        assert isinstance(getattr(rhs, "design_info", None), DesignInfo)
        if lhs.shape[0] != rhs.shape[0]:
            raise PatsyError("shape mismatch: outcome matrix has %s rows, "
                                "predictor matrix has %s rows"
                                % (lhs.shape[0], rhs.shape[0]))
        if rhs_orig_index is not None and lhs_orig_index is not None:
            if not rhs_orig_index.equals(lhs_orig_index):
                raise PatsyError("index mismatch: outcome and "
                                    "predictor have incompatible indexes")
        if return_type == "dataframe":
            if rhs_orig_index is not None and lhs_orig_index is None:
                lhs.index = rhs.index
            if rhs_orig_index is None and lhs_orig_index is not None:
                rhs.index = lhs.index
        return (lhs, rhs)
github pydata / patsy / patsy / design_info.py View on Github external
# Check intercept handling in describe()
    assert DesignInfo(["Intercept", "a", "b"]).describe() == "1 + a + b"

    # Failure modes
    # must specify either both or neither of factor_infos and term_codings:
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], factor_infos=factor_infos)
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], term_codings=term_codings)
    # factor_infos must be a dict
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y"], list(factor_infos), term_codings)
    # wrong number of column names:
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3", "y1", "y2"], factor_infos, term_codings)
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3"], factor_infos, term_codings)
    # name overlap problems
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "y", "y2"], factor_infos, term_codings)
    # duplicate name
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x1", "x1", "y"], factor_infos, term_codings)

    # f_y is in factor_infos, but not mentioned in any term
    term_codings_x_only = OrderedDict(term_codings)
    del term_codings_x_only[t_y]
    assert_raises(ValueError, DesignInfo,
                  ["x1", "x2", "x3"], factor_infos, term_codings_x_only)

    # f_a is in a term, but not in factor_infos
    f_a = _MockFactor("a")