How to use the patsy.design_info.FactorInfo function in patsy

To help you get started, we’ve selected a few patsy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pydata / patsy / patsy / design_info.py View on Github external
def test_FactorInfo():
    fi1 = FactorInfo("asdf", "numerical", {"a": 1}, num_columns=10)
    assert fi1.factor == "asdf"
    assert fi1.state == {"a": 1}
    assert fi1.type == "numerical"
    assert fi1.num_columns == 10
    assert fi1.categories is None

    # smoke test
    repr(fi1)

    fi2 = FactorInfo("asdf", "categorical", {"a": 2}, categories=["z", "j"])
    assert fi2.factor == "asdf"
    assert fi2.state == {"a": 2}
    assert fi2.type == "categorical"
    assert fi2.num_columns is None
    assert fi2.categories == ("z", "j")
github pydata / patsy / patsy / build.py View on Github external
for term in termlist:
            all_factors.update(term.factors)
    factor_states = _factors_memorize(all_factors, data_iter_maker, eval_env)
    # Now all the factors have working eval methods, so we can evaluate them
    # on some data to find out what type of data they return.
    (num_column_counts,
     cat_levels_contrasts) = _examine_factor_types(all_factors,
                                                   factor_states,
                                                   data_iter_maker,
                                                   NA_action)
    # Now we need the factor infos, which encapsulate the knowledge of
    # how to turn any given factor into a chunk of data:
    factor_infos = {}
    for factor in all_factors:
        if factor in num_column_counts:
            fi = FactorInfo(factor,
                            "numerical",
                            factor_states[factor],
                            num_columns=num_column_counts[factor],
                            categories=None)
        else:
            assert factor in cat_levels_contrasts
            categories = cat_levels_contrasts[factor][0]
            fi = FactorInfo(factor,
                            "categorical",
                            factor_states[factor],
                            num_columns=None,
                            categories=categories)
        factor_infos[factor] = fi
    # And now we can construct the DesignInfo for each termlist:
    design_infos = []
    for termlist in termlists:
github pydata / patsy / patsy / design_info.py View on Github external
def test_DesignInfo():
    from nose.tools import assert_raises
    class _MockFactor(object):
        def __init__(self, name):
            self._name = name

        def name(self):
            return self._name
    f_x = _MockFactor("x")
    f_y = _MockFactor("y")
    t_x = Term([f_x])
    t_y = Term([f_y])
    factor_infos = {f_x:
                      FactorInfo(f_x, "numerical", {}, num_columns=3),
                    f_y:
                      FactorInfo(f_y, "numerical", {}, num_columns=1),
                   }
    term_codings = OrderedDict([(t_x, [SubtermInfo([f_x], {}, 3)]),
                                (t_y, [SubtermInfo([f_y], {}, 1)])])
    di = DesignInfo(["x1", "x2", "x3", "y"], factor_infos, term_codings)
    assert di.column_names == ["x1", "x2", "x3", "y"]
    assert di.term_names == ["x", "y"]
    assert di.terms == [t_x, t_y]
    assert di.column_name_indexes == {"x1": 0, "x2": 1, "x3": 2, "y": 3}
    assert di.term_name_slices == {"x": slice(0, 3), "y": slice(3, 4)}
    assert di.term_slices == {t_x: slice(0, 3), t_y: slice(3, 4)}
    assert di.describe() == "x + y"

    assert di.slice(1) == slice(1, 2)
    assert di.slice("x1") == slice(0, 1)
github pydata / patsy / patsy / build.py View on Github external
def test__subterm_column_names_iter_and__build_subterm():
    from nose.tools import assert_raises
    from patsy.contrasts import ContrastMatrix
    from patsy.categorical import C
    f1 = _MockFactor("f1")
    f2 = _MockFactor("f2")
    f3 = _MockFactor("f3")
    contrast = ContrastMatrix(np.array([[0, 0.5],
                                        [3, 0]]),
                              ["[c1]", "[c2]"])

    factor_infos1 = {f1: FactorInfo(f1, "numerical", {},
                                    num_columns=1, categories=None),
                     f2: FactorInfo(f2, "categorical", {},
                                    num_columns=None, categories=["a", "b"]),
                     f3: FactorInfo(f3, "numerical", {},
                                    num_columns=1, categories=None),
                     }
    contrast_matrices = {f2: contrast}
    subterm1 = SubtermInfo([f1, f2, f3], contrast_matrices, 2)
    assert (list(_subterm_column_names_iter(factor_infos1, subterm1))
            == ["f1:f2[c1]:f3", "f1:f2[c2]:f3"])

    mat = np.empty((3, 2))
    _build_subterm(subterm1, factor_infos1,
                   {f1: atleast_2d_column_default([1, 2, 3]),
                    f2: np.asarray([0, 0, 1]),
                    f3: atleast_2d_column_default([7.5, 2, -12])},
                   mat)
    assert np.allclose(mat, [[0, 0.5 * 1 * 7.5],
                             [0, 0.5 * 2 * 2],
                             [3 * 3 * -12, 0]])
github pydata / patsy / patsy / build.py View on Github external
def test__eval_factor_numerical():
    from nose.tools import assert_raises
    naa = NAAction()
    f = _MockFactor()

    fi1 = FactorInfo(f, "numerical", {}, num_columns=1, categories=None)

    assert fi1.factor is f
    eval123, is_NA = _eval_factor(fi1, {"mock": [1, 2, 3]}, naa)
    assert eval123.shape == (3, 1)
    assert np.all(eval123 == [[1], [2], [3]])
    assert is_NA.shape == (3,)
    assert np.all(~is_NA)
    assert_raises(PatsyError, _eval_factor, fi1, {"mock": [[[1]]]}, naa)
    assert_raises(PatsyError, _eval_factor, fi1, {"mock": [[1, 2]]}, naa)
    assert_raises(PatsyError, _eval_factor, fi1, {"mock": ["a", "b"]}, naa)
    assert_raises(PatsyError, _eval_factor, fi1, {"mock": [True, False]}, naa)
    fi2 = FactorInfo(_MockFactor(), "numerical",
                     {}, num_columns=2, categories=None)
    eval123321, is_NA = _eval_factor(fi2,
                                     {"mock": [[1, 3], [2, 2], [3, 1]]},
                                     naa)
github pydata / patsy / patsy / design_info.py View on Github external
def test_FactorInfo():
    fi1 = FactorInfo("asdf", "numerical", {"a": 1}, num_columns=10)
    assert fi1.factor == "asdf"
    assert fi1.state == {"a": 1}
    assert fi1.type == "numerical"
    assert fi1.num_columns == 10
    assert fi1.categories is None

    # smoke test
    repr(fi1)

    fi2 = FactorInfo("asdf", "categorical", {"a": 2}, categories=["z", "j"])
    assert fi2.factor == "asdf"
    assert fi2.state == {"a": 2}
    assert fi2.type == "categorical"
    assert fi2.num_columns is None
    assert fi2.categories == ("z", "j")

    # smoke test
    repr(fi2)

    from nose.tools import assert_raises
    assert_raises(ValueError, FactorInfo, "asdf", "non-numerical", {})
    assert_raises(ValueError, FactorInfo, "asdf", "numerical", {})

    assert_raises(ValueError, FactorInfo, "asdf", "numerical", {},
                  num_columns="asdf")
    assert_raises(ValueError, FactorInfo, "asdf", "numerical", {},
github pydata / patsy / patsy / design_info.py View on Github external
term_factors = set(term.factors)
                for subterm in subterms:
                    if not isinstance(subterm, SubtermInfo):
                        raise ValueError("expected SubtermInfo, "
                                         "not %r" % (subterm,))
                    if not term_factors.issuperset(subterm.factors):
                        raise ValueError("unexpected factors in subterm")

            all_factors = set()
            for term in self.term_codings:
                all_factors.update(term.factors)
            if all_factors != set(self.factor_infos):
                raise ValueError("Provided Term objects and factor_infos "
                                 "do not match")
            for factor, factor_info in six.iteritems(self.factor_infos):
                if not isinstance(factor_info, FactorInfo):
                    raise ValueError("expected FactorInfo object, not %r"
                                     % (factor_info,))
                if factor != factor_info.factor:
                    raise ValueError("mismatched factor_info.factor")

            for term, subterms in six.iteritems(self.term_codings):
                for subterm in subterms:
                    exp_cols = 1
                    cat_factors = set()
                    for factor in subterm.factors:
                        fi = self.factor_infos[factor]
                        if fi.type == "numerical":
                            exp_cols *= fi.num_columns
                        else:
                            assert fi.type == "categorical"
                            cm = subterm.contrast_matrices[factor].matrix
github pydata / patsy / patsy / design_info.py View on Github external
assert_raises(ValueError, FactorInfo, "asdf", "numerical", {},
                  num_columns="asdf")
    assert_raises(ValueError, FactorInfo, "asdf", "numerical", {},
                  num_columns=1, categories=1)

    assert_raises(TypeError, FactorInfo, "asdf", "categorical", {})
    assert_raises(ValueError, FactorInfo, "asdf", "categorical", {},
                  num_columns=1)
    assert_raises(TypeError, FactorInfo, "asdf", "categorical", {},
                  categories=1)

    # Make sure longs are legal for num_columns
    # (Important on python2+win64, where array shapes are tuples-of-longs)
    if not six.PY3:
        fi_long = FactorInfo("asdf", "numerical", {"a": 1},
                             num_columns=long(10))
        assert fi_long.num_columns == 10
github pydata / patsy / patsy / design_info.py View on Github external
def test_DesignInfo():
    from nose.tools import assert_raises
    class _MockFactor(object):
        def __init__(self, name):
            self._name = name

        def name(self):
            return self._name
    f_x = _MockFactor("x")
    f_y = _MockFactor("y")
    t_x = Term([f_x])
    t_y = Term([f_y])
    factor_infos = {f_x:
                      FactorInfo(f_x, "numerical", {}, num_columns=3),
                    f_y:
                      FactorInfo(f_y, "numerical", {}, num_columns=1),
                   }
    term_codings = OrderedDict([(t_x, [SubtermInfo([f_x], {}, 3)]),
                                (t_y, [SubtermInfo([f_y], {}, 1)])])
    di = DesignInfo(["x1", "x2", "x3", "y"], factor_infos, term_codings)
    assert di.column_names == ["x1", "x2", "x3", "y"]
    assert di.term_names == ["x", "y"]
    assert di.terms == [t_x, t_y]
    assert di.column_name_indexes == {"x1": 0, "x2": 1, "x3": 2, "y": 3}
    assert di.term_name_slices == {"x": slice(0, 3), "y": slice(3, 4)}
    assert di.term_slices == {t_x: slice(0, 3), t_y: slice(3, 4)}
    assert di.describe() == "x + y"

    assert di.slice(1) == slice(1, 2)
    assert di.slice("x1") == slice(0, 1)
    assert di.slice("x2") == slice(1, 2)
    assert di.slice("x3") == slice(2, 3)