How to use anndata - 10 common examples

To help you get started, we’ve selected a few anndata examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github theislab / diffxpy / diffxpy / unit_test / test_data_types.py View on Github external
def _test_anndata_raw(self, sparse):
        data, sample_description = self.simulate()
        gene_names = ["gene" + str(i) for i in range(data.shape[1])]
        if sparse:
            data = scipy.sparse.csr_matrix(data)

        data = anndata.AnnData(data)
        data.var_names = gene_names
        data.raw = data
        self._test_wald(data=data.raw, sample_description=sample_description)
        self._test_lrt(data=data.raw, sample_description=sample_description)
        self._test_t_test(data=data, sample_description=sample_description)
        self._test_rank(data=data, sample_description=sample_description)
github theislab / scgen / tests / test_plotting.py View on Github external
def test_reg_mean_plot():
    train = sc.read("./tests/data/train.h5ad", backup_url="https://goo.gl/33HtVh")
    network = scgen.VAEArith(x_dimension=train.shape[1], model_path="../models/test")
    network.train(train_data=train, n_epochs=0)
    unperturbed_data = train[((train.obs["cell_type"] == "CD4T") & (train.obs["condition"] == "control"))]
    condition = {"ctrl": "control", "stim": "stimulated"}
    pred, delta = network.predict(adata=train, adata_to_predict=unperturbed_data, conditions=condition,
                                  condition_key="condition",cell_type_key="cell_type")
    pred_adata = anndata.AnnData(pred, obs={"condition": ["pred"] * len(pred)}, var={"var_names": train.var_names})
    CD4T = train[train.obs["cell_type"] == "CD4T"]
    all_adata = CD4T.concatenate(pred_adata)
    scgen.plotting.reg_mean_plot(all_adata, condition_key="condition", axis_keys={"x": "control", "y": "pred"},
                                 path_to_save="tests/reg_mean1.pdf")
    scgen.plotting.reg_mean_plot(all_adata, condition_key="condition", axis_keys={"x": "control", "y": "pred"},
                                 path_to_save="tests/reg_mean2.pdf",  gene_list=["ISG15", "CD3D"])
    scgen.plotting.reg_mean_plot(all_adata,condition_key="condition", axis_keys={"x": "control", "y": "pred", "y1": "stimulated"},
                                 path_to_save="tests/reg_mean3.pdf")
    scgen.plotting.reg_mean_plot(all_adata, condition_key="condition", axis_keys={"x": "control", "y": "pred", "y1": "stimulated"},
                                 gene_list=["ISG15", "CD3D"], path_to_save="tests/reg_mean.pdf",)
    network.sess.close()
github theislab / trVAE / tests / test_trVAEMulti.py View on Github external
if adata_source.shape[0] == 0:
        adata_source = pred_adatas.copy()[pred_adatas.obs[condition_key] == source_condition]

    if adata_target.shape[0] == 0:
        adata_target = pred_adatas.copy()[pred_adatas.obs[condition_key] == target_condition]

    source_labels = np.zeros(adata_source.shape[0]) + source_label
    target_labels = np.zeros(adata_source.shape[0]) + target_label

    pred_target = network.predict(adata_source,
                                  encoder_labels=source_labels,
                                  decoder_labels=target_labels,
                                  size_factor=adata_source.obs['size_factors'].values
                                  )

    pred_adata = anndata.AnnData(X=pred_target)
    pred_adata.obs[condition_key] = [name] * pred_target.shape[0]
    pred_adata.var_names = adata.var_names

    if sparse.issparse(adata_source.X):
        adata_source.X = adata_source.X.A

    if sparse.issparse(adata_target.X):
        adata_target.X = adata_target.X.A

    if sparse.issparse(pred_adata.X):
        pred_adata.X = pred_adata.X.A

    # adata_to_plot = pred_adata.concatenate(adata_target)

    # trvae.plotting.reg_mean_plot(adata_to_plot,
    #                              top_100_genes=top_100_genes,
github theislab / scgen / tests / test_mmd_cvae.py View on Github external
show=False)

        decoded_latent_with_true_labels = network.predict(data=latent_with_true_labels, encoder_labels=true_labels,
                                                          decoder_labels=true_labels, data_space='latent')

        cell_type_data = train[train.obs[cell_type_key] == cell_type]
        unperturbed_data = train[((train.obs[cell_type_key] == cell_type) & (train.obs[condition_key] == ctrl_key))]
        true_labels = np.zeros((len(unperturbed_data), 1))
        fake_labels = np.ones((len(unperturbed_data), 1))

        sc.tl.rank_genes_groups(cell_type_data, groupby=condition_key, n_genes=100)
        diff_genes = cell_type_data.uns["rank_genes_groups"]["names"][stim_key]
        # cell_type_data = cell_type_data.copy()[:, diff_genes.tolist()]

        pred = network.predict(data=unperturbed_data, encoder_labels=true_labels, decoder_labels=fake_labels)
        pred_adata = anndata.AnnData(pred, obs={condition_key: ["pred"] * len(pred)},
                                     var={"var_names": cell_type_data.var_names})
        all_adata = cell_type_data.concatenate(pred_adata)

        scgen.plotting.reg_mean_plot(all_adata, condition_key=condition_key,
                                     axis_keys={"x": ctrl_key, "y": stim_key, "y1": "pred"},
                                     gene_list=diff_genes,
                                     path_to_save=f"./figures/reg_mean_{z_dim}.pdf")
        scgen.plotting.reg_var_plot(all_adata, condition_key=condition_key,
                                    axis_keys={"x": ctrl_key, "y": stim_key, 'y1': "pred"},
                                    gene_list=diff_genes,
                                    path_to_save=f"./figures/reg_var_{z_dim}.pdf")

        sc.pp.neighbors(all_adata)
        sc.tl.umap(all_adata)
        sc.pl.umap(all_adata, color=condition_key,
                   save="pred")
github gao-lab / Cell_BLAST / test / test_data.py View on Github external
def test_anndata(self):
        ad = self.ds.to_anndata()
        ad.write_h5ad("./test.h5ad")
        ds = cb.data.ExprDataSet.from_anndata(anndata.read_h5ad("./test.h5ad"))
        self._compare_datasets(self.ds, ds)
github theislab / diffxpy / diffxpy / testing / utils.py View on Github external
:param data: Input data matrix (observations x features) or (cells x genes).
    :param sample_description: pandas.DataFrame containing sample annotations, can be None.
    :return: Assembled sample annotations.
    """
    if sample_description is None:
        if anndata is not None and isinstance(data, anndata.AnnData):
            sample_description = data.obs
        else:
            raise ValueError(
                "Please specify `sample_description` or provide `data` as anndata.AnnData " +
                "with corresponding sample annotations"
            )

    if sample_description is not None:
        if anndata is not None and isinstance(data, Raw):
            # Raw does not have attribute shape.
            assert data.X.shape[0] == sample_description.shape[0], \
                "data matrix and sample description must contain same number of cells: %i, %i" % \
                (data.X.shape[0], sample_description.shape[0])
        elif isinstance(data, glm.typing.InputDataBase):
            assert data.x.shape[0] == sample_description.shape[0], \
                "data matrix and sample description must contain same number of cells: %i, %i" % \
                (data.x.shape[0], sample_description.shape[0])
        else:
            assert data.shape[0] == sample_description.shape[0], \
                "data matrix and sample description must contain same number of cells: %i, %i" % \
                (data.shape[0], sample_description.shape[0])
    return sample_description
github theislab / diffxpy / diffxpy / testing / tests.py View on Github external
gene_names: Union[np.ndarray, list] = None,
            sample_description: pd.DataFrame = None
    ):
        """
        :param data: Array-like or anndata.Anndata object containing observations.
            Input data matrix (observations x features) or (cells x genes).
        :param parts: str, array

            - column in data.obs/sample_description which contains the split of observations into the two groups.
            - array of length `num_observations` containing group labels
        :param gene_names: optional list/array of gene names which will be used if `data` does not implicitly store these
        :param sample_description: optional pandas.DataFrame containing sample annotations
        """
        if isinstance(data, glm.typing.InputDataBase):
            self.x = data.x
        elif isinstance(data, anndata.AnnData) or isinstance(data, Raw):
            self.x = data.X
        elif isinstance(data, np.ndarray):
            self.x = data
        else:
            raise ValueError("data type %s not recognized" % type(data))
        self.gene_names = parse_gene_names(data, gene_names)
        self.sample_description = parse_sample_description(data, sample_description)
        self.partition = parse_grouping(data, sample_description, parts)
        self.partitions = np.unique(self.partition)
        self.partition_idx = [np.where(self.partition == x)[0] for x in self.partitions]
github theislab / diffxpy / diffxpy / testing / tests.py View on Github external
if lazy:
            de_test = DifferentialExpressionTestZTestLazy(
                model_estim=model,
                grouping=grouping,
                groups=np.unique(grouping),
                correction_type=pval_correction
            )
        else:
            de_test = DifferentialExpressionTestZTest(
                model_estim=model,
                grouping=grouping,
                groups=np.unique(grouping),
                correction_type=pval_correction
            )
    else:
        if isinstance(data, anndata.AnnData) or isinstance(data, anndata.Raw):
            data = data.X
        elif isinstance(data, glm.typing.InputDataBase):
            data = data.x
        groups = np.unique(grouping)
        pvals = np.tile(np.NaN, [len(groups), len(groups), data.shape[1]])
        pvals[np.eye(pvals.shape[0]).astype(bool)] = 0
        logfc = np.tile(np.NaN, [len(groups), len(groups), data.shape[1]])
        logfc[np.eye(logfc.shape[0]).astype(bool)] = 0

        if keep_full_test_objs:
            tests = np.tile([None], [len(groups), len(groups)])
        else:
            tests = None

        for i, g1 in enumerate(groups):
            for j, g2 in enumerate(groups[(i + 1):]):
github KrishnaswamyLab / graphtools / graphtools / base.py View on Github external
self._check_data(data)
        n_pca, rank_threshold = self._parse_n_pca_threshold(data, n_pca, rank_threshold)
        try:
            if isinstance(data, pd.SparseDataFrame):
                data = data.to_coo()
            elif isinstance(data, pd.DataFrame):
                try:
                    data = data.sparse.to_coo()
                except AttributeError:
                    data = np.array(data)
        except NameError:
            # pandas not installed
            pass

        try:
            if isinstance(data, anndata.AnnData):
                data = data.X
        except NameError:
            # anndata not installed
            pass
        self.data = data
        self.n_pca = n_pca
        self.rank_threshold = rank_threshold
        self.random_state = random_state
        self.data_nu = self._reduce_data()
        super().__init__(**kwargs)
github theislab / dca / dca / api.py View on Github external
If `return_info` is true, all estimated distribution parameters are stored in AnnData such as:

    - `.obsm["X_dca_dropout"]` which is the mixture coefficient (pi) of the zero component
    in ZINB, i.e. dropout probability. (Only if ae_type is zinb or zinb-conddisp)

    - `.obsm["X_dca_dispersion"]` which is the dispersion parameter of NB.

    - `.uns["dca_loss_history"]` which stores the loss history of the training.

    Finally, the raw counts are stored as `.raw`.

    If `return_model` is given, trained model is returned. When both `copy` and `return_model`
    are true, a tuple of anndata and model is returned in that order.
    """

    assert isinstance(adata, anndata.AnnData), 'adata must be an AnnData instance'
    assert mode in ('denoise', 'latent'), '%s is not a valid mode.' % mode

    # set seed for reproducibility
    random.seed(random_state)
    np.random.seed(random_state)
    tf.set_random_seed(random_state)
    os.environ['PYTHONHASHSEED'] = '0'

    # this creates adata.raw with raw counts and copies adata if copy==True
    adata = read_dataset(adata,
                         transpose=False,
                         test_split=False,
                         copy=copy)

    # check for zero genes
    nonzero_genes, _ = sc.pp.filter_genes(adata.X, min_counts=1)