How to use the patsy.design_info.DesignMatrix function in patsy

To help you get started, we’ve selected a few patsy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github theislab / diffxpy / diffxpy / testing / base.py View on Github external
])
        assert col_indices.size > 0, "Could not find any matching columns!"
        if coef_to_test is not None:
            if len(factor_loc_totest) > 1:
                raise ValueError("do not set coef_to_test if more than one factor_loc_totest is given")
            samples = sample_description[factor_loc_totest].astype(type(coef_to_test)) == coef_to_test
            one_cols = np.where(design_loc[samples][:, col_indices][0] == 1)
            if one_cols.size == 0:
                # there is no such column; modify design matrix to create one
                design_loc[:, col_indices] = np.where(samples, 1, 0)
    elif coef_to_test is not None:
        # Directly select coefficients to test from design matrix (xarray):
        # Check that coefficients to test are not dependent parameters if constraints are given:
        # TODO: design_loc is sometimes xarray and sometimes patsy when it arrives here, 
        # should it not always be xarray?
        if isinstance(design_loc, patsy.design_info.DesignMatrix):
            col_indices = np.asarray([
                design_loc.design_info.column_names.index(x)
                for x in coef_to_test
            ])
        else:
            col_indices = np.asarray([
                list(np.asarray(design_loc.coords['design_params'])).index(x)
                for x in coef_to_test
            ])
        if constraints_loc is not None:
            dep_coef_indices = np.where(np.any(constraints_loc == -1, axis=0) == True)[0]
            assert np.all([x not in dep_coef_indices for x in col_indices]), "cannot test dependent coefficient"
            indep_coef_indices = np.where(np.any(constraints_loc == -1, axis=0) == False)[0]

    ## Fit GLM:
    model = _fit(
github theislab / diffxpy / diffxpy / testing / tests.py View on Github external
return_type="patsy"
    )
    design_scale, design_scale_names, constraints_scale, term_names_scale = constraint_system_from_star(
        dmat=dmat_scale,
        sample_description=sample_description,
        formula=formula_scale,
        as_numeric=as_numeric,
        constraints=constraints_scale,
        return_type="patsy"
    )

    # Define indices of coefficients to test:
    constraints_loc_temp = constraints_loc if constraints_loc is not None else np.eye(design_loc.shape[-1])
    # Check that design_loc is patsy, otherwise  use term_names for slicing.
    if factor_loc_totest is not None:
        if not isinstance(design_loc, patsy.design_info.DesignMatrix):
            col_indices = np.where([
                x in factor_loc_totest
                for x in term_names_loc
            ])[0]
        else:
            # Select coefficients to test via formula model:
            col_indices = np.concatenate([
                np.arange(design_loc.shape[-1])[design_loc.design_info.slice(x)]
                for x in factor_loc_totest
            ])
        assert len(col_indices) > 0, "Could not find any matching columns!"
        if coef_to_test is not None:
            if len(factor_loc_totest) > 1:
                raise ValueError("do not set coef_to_test if more than one factor_loc_totest is given")
            samples = sample_description[factor_loc_totest].astype(type(coef_to_test)) == coef_to_test
            one_cols = np.where(design_loc[samples][:, col_indices][0] == 1)
github pydata / patsy / patsy / test_highlevel.py View on Github external
def test_formula_likes():
    # Plain array-like, rhs only
    t([[1, 2, 3], [4, 5, 6]], {}, 0,
      False,
      [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"])
    t((None, [[1, 2, 3], [4, 5, 6]]), {}, 0,
      False,
      [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"])
    t(np.asarray([[1, 2, 3], [4, 5, 6]]), {}, 0,
      False,
      [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"])
    t((None, np.asarray([[1, 2, 3], [4, 5, 6]])), {}, 0,
      False,
      [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"])
    dm = DesignMatrix([[1, 2, 3], [4, 5, 6]], default_column_prefix="foo")
    t(dm, {}, 0,
      False,
      [[1, 2, 3], [4, 5, 6]], ["foo0", "foo1", "foo2"])
    t((None, dm), {}, 0,
      False,
      [[1, 2, 3], [4, 5, 6]], ["foo0", "foo1", "foo2"])
      
    # Plain array-likes, lhs and rhs
    t(([1, 2], [[1, 2, 3], [4, 5, 6]]), {}, 0,
      False,
      [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"],
      [[1], [2]], ["y0"])
    t(([[1], [2]], [[1, 2, 3], [4, 5, 6]]), {}, 0,
      False,
      [[1, 2, 3], [4, 5, 6]], ["x0", "x1", "x2"],
      [[1], [2]], ["y0"])
github pydata / patsy / patsy / design_info.py View on Github external
bad_di = DesignInfo(["a1"])
    assert_raises(ValueError, DesignMatrix, [[12, 14, 16, 18]], bad_di)

    mm2 = DesignMatrix([[12, 14, 16, 18]])
    assert mm2.design_info.column_names == ["column0", "column1", "column2",
                                            "column3"]

    mm3 = DesignMatrix([12, 14, 16, 18])
    assert mm3.shape == (4, 1)

    # DesignMatrix always has exactly 2 dimensions
    assert_raises(ValueError, DesignMatrix, [[[1]]])

    # DesignMatrix constructor passes through existing DesignMatrixes
    mm4 = DesignMatrix(mm)
    assert mm4 is mm
    # But not if they are really slices:
    mm5 = DesignMatrix(mm.diagonal())
    assert mm5 is not mm

    mm6 = DesignMatrix([[12, 14, 16, 18]], default_column_prefix="x")
    assert mm6.design_info.column_names == ["x0", "x1", "x2", "x3"]

    assert_no_pickling(mm6)

    # Only real-valued matrices can be DesignMatrixs
    assert_raises(ValueError, DesignMatrix, [1, 2, 3j])
    assert_raises(ValueError, DesignMatrix, ["a", "b", "c"])
    assert_raises(ValueError, DesignMatrix, [1, 2, object()])

    # Just smoke tests
github pydata / patsy / patsy / design_info.py View on Github external
# Only real-valued matrices can be DesignMatrixs
    assert_raises(ValueError, DesignMatrix, [1, 2, 3j])
    assert_raises(ValueError, DesignMatrix, ["a", "b", "c"])
    assert_raises(ValueError, DesignMatrix, [1, 2, object()])

    # Just smoke tests
    repr(mm)
    repr(DesignMatrix(np.arange(100)))
    repr(DesignMatrix(np.arange(100) * 2.0))
    repr(mm[1:, :])
    repr(DesignMatrix(np.arange(100).reshape((1, 100))))
    repr(DesignMatrix([np.nan, np.inf]))
    repr(DesignMatrix([np.nan, 0, 1e20, 20.5]))
    # handling of zero-size matrices
    repr(DesignMatrix(np.zeros((1, 0))))
    repr(DesignMatrix(np.zeros((0, 1))))
    repr(DesignMatrix(np.zeros((0, 0))))
github pydata / patsy / patsy / highlevel.py View on Github external
def _regularize_matrix(m, default_column_prefix):
            di = DesignInfo.from_array(m, default_column_prefix)
            if have_pandas and isinstance(m, (pandas.Series, pandas.DataFrame)):
                orig_index = m.index
            else:
                orig_index = None
            if return_type == "dataframe":
                m = atleast_2d_column_default(m, preserve_pandas=True)
                m = pandas.DataFrame(m)
                m.columns = di.column_names
                m.design_info = di
                return (m, orig_index)
            else:
                return (DesignMatrix(m, di), orig_index)
        rhs, rhs_orig_index = _regularize_matrix(rhs, "x")
github pydata / patsy / patsy / build.py View on Github external
# Build factor values into matrices
    results = []
    for design_info in design_infos:
        results.append(_build_design_matrix(design_info,
                                            factor_info_to_values,
                                            dtype))
    matrices = []
    for need_reshape, matrix in results:
        if need_reshape:
            # There is no data-dependence, at all -- a formula like "1 ~ 1".
            # In this case the builder just returns a single-row matrix, and
            # we have to broadcast it vertically to the appropriate size. If
            # we can figure out what that is...
            assert matrix.shape[0] == 1
            if num_rows is not None:
                matrix = DesignMatrix(np.repeat(matrix, num_rows, axis=0),
                                      matrix.design_info)
            else:
                raise PatsyError(
                    "No design matrix has any non-trivial factors, "
                    "the data object is not a DataFrame. "
                    "I can't tell how many rows the design matrix should "
                    "have!"
                    )
        matrices.append(matrix)
    if return_type == "dataframe":
        assert have_pandas
        for i, matrix in enumerate(matrices):
            di = matrix.design_info
            matrices[i] = pandas.DataFrame(matrix,
                                           columns=di.column_names,
                                           index=pandas_index)
github pydata / patsy / patsy / design_info.py View on Github external
di = DesignInfo(["a1", "a2", "a3", "b"])
    mm = DesignMatrix([[12, 14, 16, 18]], di)
    assert mm.design_info.column_names == ["a1", "a2", "a3", "b"]

    bad_di = DesignInfo(["a1"])
    assert_raises(ValueError, DesignMatrix, [[12, 14, 16, 18]], bad_di)

    mm2 = DesignMatrix([[12, 14, 16, 18]])
    assert mm2.design_info.column_names == ["column0", "column1", "column2",
                                            "column3"]

    mm3 = DesignMatrix([12, 14, 16, 18])
    assert mm3.shape == (4, 1)

    # DesignMatrix always has exactly 2 dimensions
    assert_raises(ValueError, DesignMatrix, [[[1]]])

    # DesignMatrix constructor passes through existing DesignMatrixes
    mm4 = DesignMatrix(mm)
    assert mm4 is mm
    # But not if they are really slices:
    mm5 = DesignMatrix(mm.diagonal())
    assert mm5 is not mm

    mm6 = DesignMatrix([[12, 14, 16, 18]], default_column_prefix="x")
    assert mm6.design_info.column_names == ["x0", "x1", "x2", "x3"]

    assert_no_pickling(mm6)

    # Only real-valued matrices can be DesignMatrixs
    assert_raises(ValueError, DesignMatrix, [1, 2, 3j])
    assert_raises(ValueError, DesignMatrix, ["a", "b", "c"])
github pydata / patsy / patsy / design_info.py View on Github external
mm5 = DesignMatrix(mm.diagonal())
    assert mm5 is not mm

    mm6 = DesignMatrix([[12, 14, 16, 18]], default_column_prefix="x")
    assert mm6.design_info.column_names == ["x0", "x1", "x2", "x3"]

    assert_no_pickling(mm6)

    # Only real-valued matrices can be DesignMatrixs
    assert_raises(ValueError, DesignMatrix, [1, 2, 3j])
    assert_raises(ValueError, DesignMatrix, ["a", "b", "c"])
    assert_raises(ValueError, DesignMatrix, [1, 2, object()])

    # Just smoke tests
    repr(mm)
    repr(DesignMatrix(np.arange(100)))
    repr(DesignMatrix(np.arange(100) * 2.0))
    repr(mm[1:, :])
    repr(DesignMatrix(np.arange(100).reshape((1, 100))))
    repr(DesignMatrix([np.nan, np.inf]))
    repr(DesignMatrix([np.nan, 0, 1e20, 20.5]))
    # handling of zero-size matrices
    repr(DesignMatrix(np.zeros((1, 0))))
    repr(DesignMatrix(np.zeros((0, 1))))
    repr(DesignMatrix(np.zeros((0, 0))))