How to use the scanpy.AnnData function in scanpy

To help you get started, we’ve selected a few scanpy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github theislab / scgen / tests / test_mmd_cvae.py View on Github external
save=f"_latent_true_labels_{z_dim}",
                   show=False)

        latent_with_fake_labels = network.to_latent(net_train_data.X, fake_labels)
        latent_with_fake_labels = sc.AnnData(X=latent_with_fake_labels,
                                             obs={condition_key: net_train_data.obs[condition_key].tolist(),
                                                  cell_type_key: net_train_data.obs[cell_type_key].tolist()})
        sc.pp.neighbors(latent_with_fake_labels)
        sc.tl.umap(latent_with_fake_labels)
        sc.pl.umap(latent_with_fake_labels, color=[condition_key, cell_type_key],
                   save=f"_latent_fake_labels_{z_dim}",
                   show=False)

        mmd_with_true_labels = network.to_mmd_layer(network.cvae_model, net_train_data.X,
                                                    encoder_labels=true_labels, feed_fake=False)
        mmd_with_true_labels = sc.AnnData(X=mmd_with_true_labels,
                                          obs={condition_key: net_train_data.obs[condition_key].tolist(),
                                               cell_type_key: net_train_data.obs[cell_type_key].tolist()})
        sc.pp.neighbors(mmd_with_true_labels)
        sc.tl.umap(mmd_with_true_labels)
        sc.pl.umap(mmd_with_true_labels, color=[condition_key, cell_type_key],
                   save=f"_mmd_true_labels_{z_dim}",
                   show=False)

        mmd_with_fake_labels = network.to_mmd_layer(network.cvae_model, net_train_data.X,
                                                    encoder_labels=true_labels, feed_fake=True)
        mmd_with_fake_labels = sc.AnnData(X=mmd_with_fake_labels,
                                          obs={condition_key: net_train_data.obs[condition_key].tolist(),
                                               cell_type_key: net_train_data.obs[cell_type_key].tolist()})
        sc.pp.neighbors(mmd_with_fake_labels)
        sc.tl.umap(mmd_with_fake_labels)
        sc.pl.umap(mmd_with_fake_labels, color=[condition_key, cell_type_key],
github theislab / scgen / tests / test_cvae.py View on Github external
train = sc.read("./data/train.h5ad")
# train = train[train.obs["cell_type"] == "CD4T"]
train = train[~((train.obs["cell_type"] == "CD4T") & (train.obs["condition"] == "stimulated"))]
z_dim = 20
network = scgen.CVAE(x_dimension=train.X.shape[1], z_dimension=z_dim, alpha=0.1)
network.restore_model()
# network.train(train, n_epochs=100)

labels, _ = scgen.label_encoder(train)
latent = network.to_latent(train.X.A, labels=labels)
adata = sc.AnnData(X=latent, obs={"condition": train.obs["condition"].tolist(), "cell_type": train.obs["cell_type"].tolist()})
sc.pp.neighbors(adata)
sc.tl.umap(adata)
sc.pl.umap(adata, color =["condition", "cell_type"], save=f"train_{z_dim}")
mmd = network.to_mmd_layer(train.X.A, labels=labels)
adata_mmd = sc.AnnData(X=mmd, obs={"condition": train.obs["condition"].tolist(), "cell_type": train.obs["cell_type"].tolist()})
sc.pp.neighbors(adata_mmd)
sc.tl.umap(adata_mmd)
sc.pl.umap(adata_mmd, color =["condition", "cell_type"], save=f"true_labels_{z_dim}")
train = sc.read("./data/train.h5ad")
CD4T = train[train.obs["cell_type"] == "CD4T"]
unperturbed_data = train[((train.obs["cell_type"] == "CD4T") & (train.obs["condition"] == "control"))]
fake_labels = np.ones((len(unperturbed_data), 1))
predicted_cells = network.predict(unperturbed_data, fake_labels)
adata = sc.AnnData(predicted_cells, obs={"condition": ["pred"]*len(fake_labels)})
adata.var_names = CD4T.var_names
all_adata = CD4T.concatenate(adata)
scgen.plotting.reg_mean_plot(all_adata, condition_key="condition",
                             axis_keys={"x": "pred", "y": "stimulated"},
                             gene_list= ["ISG15", "CD3D"],
                            path_to_save=f"figures/reg_mean_{z_dim}.pdf")
github theislab / scgen / tests / test_mmd_ccvae.py View on Github external
os.chdir(f"./results/{data_name}/{cell_type}")
            # net_train_data = train[~((train.obs[cell_type_key] == cell_type) & (train.obs[condition_key] == stim_key))]
            net_train_data = train
            network = scgen.MMDCCVAE(x_dimension=(256, 256, 3,), z_dimension=z_dim, alpha=alpha, beta=beta,
                                     batch_mmd=True, kernel=kernel, train_with_fake_labels=False,
                                     model_path=f"./", arch_style=arch_style)

            # network.restore_model()
            network.train(net_train_data, n_epochs=n_epochs, batch_size=batch_size, verbose=1)
            print(f"network_{cell_type} has been trained!")

            true_labels, _ = scgen.label_encoder(net_train_data)
            fake_labels = np.ones(shape=(net_train_data.shape[0], 1))

            latent_with_true_labels = network.to_latent(net_train_data.X, labels=true_labels)
            latent_with_true_labels = sc.AnnData(X=latent_with_true_labels,
                                                 obs={condition_key: net_train_data.obs[condition_key].tolist(),
                                                      cell_type_key: pd.Categorical(net_train_data.obs[cell_type_key])})
            sc.pp.neighbors(latent_with_true_labels)
            sc.tl.umap(latent_with_true_labels)
            sc.pl.umap(latent_with_true_labels, color=[condition_key, cell_type_key],
                       save=f"_latent_true_labels_{z_dim}",
                       show=False)

            latent_with_fake_labels = network.to_latent(net_train_data.X, fake_labels)
            latent_with_fake_labels = sc.AnnData(X=latent_with_fake_labels,
                                                 obs={condition_key: net_train_data.obs[condition_key].tolist(),
                                                      cell_type_key: pd.Categorical(net_train_data.obs[cell_type_key])})
            sc.pp.neighbors(latent_with_fake_labels)
            sc.tl.umap(latent_with_fake_labels)
            sc.pl.umap(latent_with_fake_labels, color=[condition_key, cell_type_key],
                       save=f"_latent_fake_labels_{z_dim}",
github theislab / trVAE / tests / test_trVAEMulti.py View on Github external
pred_adatas = pred_adata
            else:
                pred_adatas = pred_adatas.concatenate(pred_adata)

        pred_adatas.write_h5ad(filename=f"../data/reconstructed/RCVAEMulti/{data_name}.h5ad")

        import matplotlib as mpl
        mpl.rcParams.update(mpl.rcParamsDefault)

        color = [condition_key, cell_type_key]

        latent_with_true_labels = sc.AnnData(X=latent_with_true_labels)
        latent_with_true_labels.obs[condition_key] = data.obs[condition_key].values
        latent_with_true_labels.obs[cell_type_key] = data.obs[cell_type_key].values

        latent_with_fake_labels = [sc.AnnData(X=latent_with_fake_labels[i]) for i in range(n_conditions)]
        for i in range(n_conditions):
            latent_with_fake_labels[i].obs[condition_key] = data.obs[condition_key].values
            latent_with_fake_labels[i].obs[cell_type_key] = data.obs[cell_type_key].values

            sc.pp.neighbors(latent_with_fake_labels[i])
            sc.tl.umap(latent_with_fake_labels[i])
            sc.pl.umap(latent_with_fake_labels[i], color=color,
                       save=f"_{data_name}_{cell_type}_latent_with_fake_labels_{i}",
                       show=False,
                       wspace=0.15,
                       frameon=False)

        mmd_latent_with_true_labels = sc.AnnData(X=mmd_latent_with_true_labels)
        mmd_latent_with_true_labels.obs[condition_key] = data.obs[condition_key].values
        mmd_latent_with_true_labels.obs[cell_type_key] = data.obs[cell_type_key].values
github theislab / trVAE / tests / test_trVAEATAC.py View on Github external
import matplotlib as mpl
    mpl.rcParams.update(mpl.rcParamsDefault)

    latent_with_true_labels = sc.AnnData(X=latent_with_true_labels)
    latent_with_true_labels.obs['condition'] = data.obs['condition'].values
    # latent_with_true_labels.obs[cell_type_key] = data.obs[cell_type_key].values

    latent_with_fake_labels = sc.AnnData(X=latent_with_fake_labels)
    latent_with_fake_labels.obs['condition'] = data.obs['condition'].values
    # latent_with_fake_labels.obs[cell_type_key] = data.obs[cell_type_key].values

    mmd_latent_with_true_labels = sc.AnnData(X=mmd_latent_with_true_labels)
    mmd_latent_with_true_labels.obs['condition'] = data.obs['condition'].values
    # mmd_latent_with_true_labels.obs[cell_type_key] = data.obs[cell_type_key].values

    mmd_latent_with_fake_labels = sc.AnnData(X=mmd_latent_with_fake_labels)
    mmd_latent_with_fake_labels.obs['condition'] = data.obs['condition'].values
    # mmd_latent_with_fake_labels.obs[cell_type_key] = data.obs[cell_type_key].values

    color = ['condition']

    sc.pp.neighbors(data)
    sc.tl.umap(data)
    sc.pl.umap(data, color=color,
               save=f'_{data_name}_train_data',
               show=False)

    sc.pp.neighbors(latent_with_true_labels)
    sc.tl.umap(latent_with_true_labels)
    sc.pl.umap(latent_with_true_labels, color=color,
               save=f"_{data_name}_latent_with_true_labels",
               show=False)
github theislab / trVAE / tests / test_vae.py View on Github external
network = trvae.VAE(x_dimension=data.shape[1],
                        z_dimension=z_dim,
                        arch_style=arch_style,
                        model_path=f"../models/VAE/{data_name}-{arch_style}/{z_dim}/", )

    network.restore_model()

    if sparse.issparse(data.X):
        data.X = data.X.A

    feed_data = data.X

    latent = network.to_latent(feed_data)

    latent = sc.AnnData(X=latent)
    latent.obs[cell_type_key] = data.obs[cell_type_key].values

    color = [cell_type_key]

    sc.pp.neighbors(train_data)
    sc.tl.umap(train_data)
    sc.pl.umap(train_data, color=color,
               save=f'_{data_name}_train_data.pdf',
               show=False)

    sc.pp.neighbors(latent)
    sc.tl.umap(latent)
    sc.pl.umap(latent, color=color,
               save=f"_{data_name}_latent.pdf",
               show=False)
github theislab / trVAE / tests / test_cvae.py View on Github external
top_100_genes=top_100_genes,
                                    gene_list=gene_list,
                                    condition_key='condition',
                                    axis_keys={"x": 'predicted', 'y': target_key},
                                    labels={'x': 'pred stim', 'y': 'real stim'},
                                    legend=False,
                                    fontsize=20,
                                    textsize=14,
                                    title=cell_type,
                                    path_to_save=os.path.join(path_to_save,
                                                              f'rcvae_reg_var_{data_name}_{cell_type}.pdf'))

        import matplotlib as mpl
        mpl.rcParams.update(mpl.rcParamsDefault)

        latent_with_true_labels = sc.AnnData(X=latent_with_true_labels)
        latent_with_true_labels.obs['condition'] = data.obs['condition'].values
        latent_with_true_labels.obs[cell_type_key] = data.obs[cell_type_key].values

        latent_with_fake_labels = sc.AnnData(X=latent_with_fake_labels)
        latent_with_fake_labels.obs['condition'] = data.obs['condition'].values
        latent_with_fake_labels.obs[cell_type_key] = data.obs[cell_type_key].values

        mmd_latent_with_true_labels = sc.AnnData(X=mmd_latent_with_true_labels)
        mmd_latent_with_true_labels.obs['condition'] = data.obs['condition'].values
        mmd_latent_with_true_labels.obs[cell_type_key] = data.obs[cell_type_key].values

        mmd_latent_with_fake_labels = sc.AnnData(X=mmd_latent_with_fake_labels)
        mmd_latent_with_fake_labels.obs['condition'] = data.obs['condition'].values
        mmd_latent_with_fake_labels.obs[cell_type_key] = data.obs[cell_type_key].values

        color = ['condition', cell_type_key]
github theislab / scgen / scgen / models / util.py View on Github external
sc.pl.violin(all_adata, keys=diff_genes.tolist()[0], groupby=condition_key,
                     save=f"_{diff_genes.tolist()[0]}",
                     show=False)

        plt.close("all")

    elif isinstance(network, scgen.CVAE):
        true_labels, _ = scgen.label_encoder(train)


        if sparse.issparse(train.X):
            latent = network.to_latent(train.X.A, labels=true_labels)
        else:
            latent = network.to_latent(train.X, labels=true_labels)
        latent = sc.AnnData(X=latent,
                            obs={condition_key: train.obs[condition_key].tolist(),
                                 cell_type_key: train.obs[cell_type_key].tolist()})
        if plot_umap:
            sc.pp.neighbors(latent)
            sc.tl.umap(latent)
            sc.pl.umap(latent, color=[condition_key, cell_type_key],
                       save=f"_latent",
                       show=False)

        cell_type_data = train[train.obs[cell_type_key] == cell_type]
        fake_labels = np.ones(shape=(cell_type_data.shape[0], 1))

        pred = network.predict(data=cell_type_data, labels=fake_labels)

        pred_adata = anndata.AnnData(pred, obs={condition_key: ["pred"] * len(pred)},
                                     var={"var_names": cell_type_data.var_names})
github YosefLab / scVI / scvi / dataset / dataset.py View on Github external
"Choose one of the following flavors: 'seurat_v2', 'seurat_v3', 'cell_ranger', 'poisson_zeros'"
            )

        if flavor == "seurat_v3" and n_top_genes is None:
            raise ValueError("n_top_genes must not be None with flavor=='seurat_v3'")

        logger.info("extracting highly variable genes using {} flavor".format(flavor))

        # Creating AnnData structure
        obs = pd.DataFrame(
            data=dict(batch=self.batch_indices.squeeze()),
            index=np.arange(self.nb_cells),
        ).astype("category")

        counts = sp_sparse.csc_matrix(self.X.copy())
        adata = sc.AnnData(X=counts, obs=obs)
        batch_key = "batch" if (batch_correction and self.n_batches >= 2) else None
        if flavor in ["cell_ranger", "seurat_v2"]:
            if flavor == "seurat_v2":
                # name expected by scanpy
                flavor = "seurat"

            # Counts normalization
            sc.pp.normalize_total(adata, target_sum=1e4)
            # logarithmed data
            sc.pp.log1p(adata)

            # Finding top genes
            sc.pp.highly_variable_genes(
                adata=adata,
                n_top_genes=n_top_genes,
                flavor=flavor,
github czbiohub / molecular-cross-validation / examples / sweep.py View on Github external
if issparse(X):
        X = np.array(X.todense())
    if np.allclose(X, X.astype(np.int)):
        X = X.astype(np.int)
    else:
        raise TypeError(
            "Molecular cross-validation requires integer count data.")

    if random_seed:
        np.random.seed(random_seed)

    X1 = np.random.binomial(X, p).astype(np.float)
    X2 = X - X1

    adata1 = sc.AnnData(X=X1)
    adata2 = sc.AnnData(X=X2)

    return adata1, adata2