How to use the patsy.util.atleast_2d_column_default function in patsy

To help you get started, we’ve selected a few patsy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pydata / patsy / patsy / test_build.py View on Github external
def assert_full_rank(m):
    m = atleast_2d_column_default(m)
    if m.shape[1] == 0:
        return True
    u, s, v = np.linalg.svd(m)
    rank = np.sum(s > 1e-10)
    assert rank == m.shape[1]
github pydata / patsy / patsy / build.py View on Github external
factor_infos1 = {f1: FactorInfo(f1, "numerical", {},
                                    num_columns=1, categories=None),
                     f2: FactorInfo(f2, "categorical", {},
                                    num_columns=None, categories=["a", "b"]),
                     f3: FactorInfo(f3, "numerical", {},
                                    num_columns=1, categories=None),
                     }
    contrast_matrices = {f2: contrast}
    subterm1 = SubtermInfo([f1, f2, f3], contrast_matrices, 2)
    assert (list(_subterm_column_names_iter(factor_infos1, subterm1))
            == ["f1:f2[c1]:f3", "f1:f2[c2]:f3"])

    mat = np.empty((3, 2))
    _build_subterm(subterm1, factor_infos1,
                   {f1: atleast_2d_column_default([1, 2, 3]),
                    f2: np.asarray([0, 0, 1]),
                    f3: atleast_2d_column_default([7.5, 2, -12])},
                   mat)
    assert np.allclose(mat, [[0, 0.5 * 1 * 7.5],
                             [0, 0.5 * 2 * 2],
                             [3 * 3 * -12, 0]])
    # Check that missing categorical values blow up
    assert_raises(PatsyError, _build_subterm, subterm1, factor_infos1,
                  {f1: atleast_2d_column_default([1, 2, 3]),
                   f2: np.asarray([0, -1, 1]),
                   f3: atleast_2d_column_default([7.5, 2, -12])},
                  mat)

    factor_infos2 = dict(factor_infos1)
    factor_infos2[f1] = FactorInfo(f1, "numerical", {},
                                   num_columns=2, categories=None)
github pydata / patsy / patsy / highlevel.py View on Github external
def _regularize_matrix(m, default_column_prefix):
            di = DesignInfo.from_array(m, default_column_prefix)
            if have_pandas and isinstance(m, (pandas.Series, pandas.DataFrame)):
                orig_index = m.index
            else:
                orig_index = None
            if return_type == "dataframe":
                m = atleast_2d_column_default(m, preserve_pandas=True)
                m = pandas.DataFrame(m)
                m.columns = di.column_names
                m.design_info = di
                return (m, orig_index)
            else:
                return (DesignMatrix(m, di), orig_index)
        rhs, rhs_orig_index = _regularize_matrix(rhs, "x")
github pydata / patsy / patsy / build.py View on Github external
num_column_counts = {}
    cat_sniffers = {}
    examine_needed = set(factors)
    for data in data_iter_maker():
        for factor in list(examine_needed):
            value = factor.eval(factor_states[factor], data)
            if factor in cat_sniffers or guess_categorical(value):
                if factor not in cat_sniffers:
                    cat_sniffers[factor] = CategoricalSniffer(NA_action,
                                                              factor.origin)
                done = cat_sniffers[factor].sniff(value)
                if done:
                    examine_needed.remove(factor)
            else:
                # Numeric
                value = atleast_2d_column_default(value)
                _max_allowed_dim(2, value, factor)
                column_count = value.shape[1]
                num_column_counts[factor] = column_count
                examine_needed.remove(factor)
        if not examine_needed:
            break
    # Pull out the levels
    cat_levels_contrasts = {}
    for factor, sniffer in six.iteritems(cat_sniffers):
        cat_levels_contrasts[factor] = sniffer.levels_contrast()
    return (num_column_counts, cat_levels_contrasts)
github pydata / patsy / patsy / build.py View on Github external
def _eval_factor(factor_info, data, NA_action):
    factor = factor_info.factor
    result = factor.eval(factor_info.state, data)
    # Returns either a 2d ndarray, or a DataFrame, plus is_NA mask
    if factor_info.type == "numerical":
        result = atleast_2d_column_default(result, preserve_pandas=True)
        _max_allowed_dim(2, result, factor)
        if result.shape[1] != factor_info.num_columns:
            raise PatsyError("when evaluating factor %s, I got %s columns "
                                "instead of the %s I was expecting"
                                % (factor.name(),
                                   factor_info.num_columns,
                                   result.shape[1]),
                                factor)
        if not safe_issubdtype(np.asarray(result).dtype, np.number):
            raise PatsyError("when evaluating numeric factor %s, "
                             "I got non-numeric data of type '%s'"
                             % (factor.name(), result.dtype),
                             factor)
        return result, NA_action.is_numerical_NA(result)
    # returns either a 1d ndarray or a pandas.Series, plus is_NA mask
    else:
github pydata / patsy / patsy / state.py View on Github external
def memorize_chunk(self, x, center=True, rescale=True, ddof=0):
        x = atleast_2d_column_default(x)
        if self.current_mean is None:
            self.current_mean = np.zeros(x.shape[1], dtype=wide_dtype_for(x))
            self.current_M2 = np.zeros(x.shape[1], dtype=wide_dtype_for(x))
        # XX this can surely be vectorized but I am feeling lazy:
        for i in range(x.shape[0]):
            self.current_n += 1
            delta = x[i, :] - self.current_mean
            self.current_mean += delta / self.current_n
            self.current_M2 += delta * (x[i, :] - self.current_mean)
github pydata / patsy / patsy / design_info.py View on Github external
`design_info` argument is not given, then one is created via
        :meth:`DesignInfo.from_array` using the given
        `default_column_prefix`.

        Depending on the input array, it is possible this will pass through
        its input unchanged, or create a view.
        """
        # Pass through existing DesignMatrixes. The design_info check is
        # necessary because numpy is sort of annoying and cannot be stopped
        # from turning non-design-matrix arrays into DesignMatrix
        # instances. (E.g., my_dm.diagonal() will return a DesignMatrix
        # object, but one without a design_info attribute.)
        if (isinstance(input_array, DesignMatrix)
            and hasattr(input_array, "design_info")):
            return input_array
        self = atleast_2d_column_default(input_array).view(cls)
        # Upcast integer to floating point
        if safe_issubdtype(self.dtype, np.integer):
            self = np.asarray(self, dtype=float).view(cls)
        if self.ndim > 2:
            raise ValueError("DesignMatrix must be 2d")
        assert self.ndim == 2
        if design_info is None:
            design_info = DesignInfo.from_array(self, default_column_prefix)
        if len(design_info.column_names) != self.shape[1]:
            raise ValueError("wrong number of column names for design matrix "
                             "(got %s, wanted %s)"
                             % (len(design_info.column_names), self.shape[1]))
        self.design_info = design_info
        if not safe_issubdtype(self.dtype, np.floating):
            raise ValueError("design matrix must be real-valued floating point")
        return self
github pydata / patsy / patsy / constraint.py View on Github external
def __init__(self, variable_names, coefs, constants=None):
        self.variable_names = list(variable_names)
        self.coefs = np.atleast_2d(np.asarray(coefs, dtype=float))
        if constants is None:
            constants = np.zeros(self.coefs.shape[0], dtype=float)
        constants = np.asarray(constants, dtype=float)
        self.constants = atleast_2d_column_default(constants)
        if self.constants.ndim != 2 or self.constants.shape[1] != 1:
            raise ValueError("constants is not (convertible to) a column matrix")
        if self.coefs.ndim != 2 or self.coefs.shape[1] != len(variable_names):
            raise ValueError("wrong shape for coefs")
        if self.coefs.shape[0] == 0:
            raise ValueError("must have at least one row in constraint matrix")
        if self.coefs.shape[0] != self.constants.shape[0]:
            raise ValueError("shape mismatch between coefs and constants")