How to use the anndata.Raw function in anndata

To help you get started, we’ve selected a few anndata examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github theislab / diffxpy / diffxpy / testing / utils.py View on Github external
:param data: Input data matrix (observations x features) or (cells x genes).
    :param sample_description: pandas.DataFrame containing sample annotations, can be None.
    :return: Assembled sample annotations.
    """
    if sample_description is None:
        if anndata is not None and isinstance(data, anndata.AnnData):
            sample_description = data.obs
        else:
            raise ValueError(
                "Please specify `sample_description` or provide `data` as anndata.AnnData " +
                "with corresponding sample annotations"
            )

    if sample_description is not None:
        if anndata is not None and isinstance(data, Raw):
            # Raw does not have attribute shape.
            assert data.X.shape[0] == sample_description.shape[0], \
                "data matrix and sample description must contain same number of cells: %i, %i" % \
                (data.X.shape[0], sample_description.shape[0])
        elif isinstance(data, glm.typing.InputDataBase):
            assert data.x.shape[0] == sample_description.shape[0], \
                "data matrix and sample description must contain same number of cells: %i, %i" % \
                (data.x.shape[0], sample_description.shape[0])
        else:
            assert data.shape[0] == sample_description.shape[0], \
                "data matrix and sample description must contain same number of cells: %i, %i" % \
                (data.shape[0], sample_description.shape[0])
    return sample_description
github theislab / diffxpy / diffxpy / testing / tests.py View on Github external
gene_names: Union[np.ndarray, list] = None,
            sample_description: pd.DataFrame = None
    ):
        """
        :param data: Array-like or anndata.Anndata object containing observations.
            Input data matrix (observations x features) or (cells x genes).
        :param parts: str, array

            - column in data.obs/sample_description which contains the split of observations into the two groups.
            - array of length `num_observations` containing group labels
        :param gene_names: optional list/array of gene names which will be used if `data` does not implicitly store these
        :param sample_description: optional pandas.DataFrame containing sample annotations
        """
        if isinstance(data, glm.typing.InputDataBase):
            self.x = data.x
        elif isinstance(data, anndata.AnnData) or isinstance(data, Raw):
            self.x = data.X
        elif isinstance(data, np.ndarray):
            self.x = data
        else:
            raise ValueError("data type %s not recognized" % type(data))
        self.gene_names = parse_gene_names(data, gene_names)
        self.sample_description = parse_sample_description(data, sample_description)
        self.partition = parse_grouping(data, sample_description, parts)
        self.partitions = np.unique(self.partition)
        self.partition_idx = [np.where(self.partition == x)[0] for x in self.partitions]
github theislab / diffxpy / diffxpy / testing / tests.py View on Github external
if lazy:
            de_test = DifferentialExpressionTestZTestLazy(
                model_estim=model,
                grouping=grouping,
                groups=np.unique(grouping),
                correction_type=pval_correction
            )
        else:
            de_test = DifferentialExpressionTestZTest(
                model_estim=model,
                grouping=grouping,
                groups=np.unique(grouping),
                correction_type=pval_correction
            )
    else:
        if isinstance(data, anndata.AnnData) or isinstance(data, anndata.Raw):
            data = data.X
        elif isinstance(data, glm.typing.InputDataBase):
            data = data.x
        groups = np.unique(grouping)
        pvals = np.tile(np.NaN, [len(groups), len(groups), data.shape[1]])
        pvals[np.eye(pvals.shape[0]).astype(bool)] = 0
        logfc = np.tile(np.NaN, [len(groups), len(groups), data.shape[1]])
        logfc[np.eye(logfc.shape[0]).astype(bool)] = 0

        if keep_full_test_objs:
            tests = np.tile([None], [len(groups), len(groups)])
        else:
            tests = None

        for i, g1 in enumerate(groups):
            for j, g2 in enumerate(groups[(i + 1):]):
github theislab / diffxpy / diffxpy / fit / fit.py View on Github external
- array of length `num_observations` containing group labels
        :param gene_names: optional list/array of gene names which will be used if `data` does not implicitly store these
        :param sample_description: optional pandas.DataFrame containing sample annotations.
        :param dmat_loc: Pre-built location model design matrix.
            This over-rides formula_loc and sample description information given in
            data or sample_description.
        :param dmat_scale: Pre-built scale model design matrix.
            This over-rides formula_scale and sample description information given in
            data or sample_description.
        :param size_factors: 1D array of transformed library size factors for each cell in the
            same order as in data or string-type column identifier of size-factor containing
            column in sample description.
        """
        if isinstance(data, glm.typing.InputDataBase):
            self.x = data.x
        elif isinstance(data, anndata.AnnData) or isinstance(data, Raw):
            self.x = data.X
        elif isinstance(data, np.ndarray):
            self.x = data
        else:
            raise ValueError("data type %s not recognized" % type(data))
        self.gene_names = parse_gene_names(data, gene_names)
        self.sample_description = parse_sample_description(data, sample_description)
        self.dmat_loc = dmat_loc
        self.dmat_scale = dmat_scale
        self.size_factors = size_factors
        self.partition = parse_grouping(data, sample_description, parts)
        self.partitions = np.unique(self.partition)
        self.partition_idx = [np.where(self.partition == x)[0] for x in self.partitions]