How to use the scanpy.pp function in scanpy

To help you get started, we’ve selected a few scanpy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github theislab / trVAE / tests / monitor_beta.py View on Github external
def create_data(data_dict):
    data_name = data_dict['name']
    source_keys = data_dict.get("source_conditions")
    target_keys = data_dict.get("target_conditions")
    cell_type_key = data_dict.get("cell_type_key", None)
    condition_key = data_dict.get('condition_key', 'condition')
    spec_cell_type = data_dict.get("spec_cell_types", None)[0]

    adata = sc.read(f"./data/{data_name}/{data_name}_normalized.h5ad")
    adata = adata[adata.obs[condition_key].isin(source_keys + target_keys)]

    if adata.shape[1] > 2000:
        sc.pp.highly_variable_genes(adata, n_top_genes=2000)
        adata = adata[:, adata.var['highly_variable']]

    train_adata, valid_adata = train_test_split(adata, 0.80)

    net_train_adata = train_adata.copy()[~((train_adata.obs[cell_type_key] == spec_cell_type) &
                                           (train_adata.obs[condition_key].isin(target_keys)))]
    net_valid_adata = valid_adata.copy()[~((valid_adata.obs[cell_type_key] == spec_cell_type) &
                                           (valid_adata.obs[condition_key].isin(target_keys)))]
    return adata, net_train_adata, net_valid_adata
github theislab / trVAE / tests / test_cvae.py View on Github external
mmd_latent_with_true_labels.obs['condition'] = data.obs['condition'].values
        mmd_latent_with_true_labels.obs[cell_type_key] = data.obs[cell_type_key].values

        mmd_latent_with_fake_labels = sc.AnnData(X=mmd_latent_with_fake_labels)
        mmd_latent_with_fake_labels.obs['condition'] = data.obs['condition'].values
        mmd_latent_with_fake_labels.obs[cell_type_key] = data.obs[cell_type_key].values

        color = ['condition', cell_type_key]

        sc.pp.neighbors(train_data)
        sc.tl.umap(train_data)
        sc.pl.umap(train_data, color=color,
                   save=f'_{data_name}_{cell_type}_train_data',
                   show=False)

        sc.pp.neighbors(latent_with_true_labels)
        sc.tl.umap(latent_with_true_labels)
        sc.pl.umap(latent_with_true_labels, color=color,
                   save=f"_{data_name}_{cell_type}_latent_with_true_labels",
                   show=False)

        sc.pp.neighbors(latent_with_fake_labels)
        sc.tl.umap(latent_with_fake_labels)
        sc.pl.umap(latent_with_fake_labels, color=color,
                   save=f"_{data_name}_{cell_type}_latent_with_fake_labels",
                   show=False)

        sc.pp.neighbors(mmd_latent_with_true_labels)
        sc.tl.umap(mmd_latent_with_true_labels)
        sc.pl.umap(mmd_latent_with_true_labels, color=color,
                   save=f"_{data_name}_{cell_type}_mmd_latent_with_true_labels",
                   show=False)
github theislab / trVAE / tests / test_trVAEMulti.py View on Github external
mmd_latent_with_true_labels.obs[condition_key] = data.obs[condition_key].values
        mmd_latent_with_true_labels.obs[cell_type_key] = data.obs[cell_type_key].values

        latent_with_true_labels = sc.AnnData(X=latent_with_true_labels)
        latent_with_true_labels.obs[condition_key] = data.obs[condition_key].values
        latent_with_true_labels.obs[cell_type_key] = data.obs[cell_type_key].values

        sc.pp.neighbors(train_data)
        sc.tl.umap(train_data)
        sc.pl.umap(train_data, color=color,
                   save=f'_{data_name}_{cell_type}_train_data',
                   show=False,
                   wspace=0.15,
                   frameon=False)

        sc.pp.neighbors(mmd_latent_with_true_labels)
        sc.tl.umap(mmd_latent_with_true_labels)
        sc.pl.umap(mmd_latent_with_true_labels, color=color,
                   save=f"_{data_name}_{cell_type}_mmd_latent_with_true_labels",
                   show=False,
                   wspace=0.15,
                   frameon=False)

        sc.pp.neighbors(latent_with_true_labels)
        sc.tl.umap(latent_with_true_labels)
        sc.pl.umap(latent_with_true_labels, color=color,
                   save=f"_{data_name}_{cell_type}_latent_with_true_labels",
                   show=False,
                   wspace=0.15,
                   frameon=False)

        # mmd_latent_with_true_labels.obs['mmd'] = 'others'
github theislab / trVAE / tests / test_vae.py View on Github external
feed_data = data.X

    latent = network.to_latent(feed_data)

    latent = sc.AnnData(X=latent)
    latent.obs[cell_type_key] = data.obs[cell_type_key].values

    color = [cell_type_key]

    sc.pp.neighbors(train_data)
    sc.tl.umap(train_data)
    sc.pl.umap(train_data, color=color,
               save=f'_{data_name}_train_data.pdf',
               show=False)

    sc.pp.neighbors(latent)
    sc.tl.umap(latent)
    sc.pl.umap(latent, color=color,
               save=f"_{data_name}_latent.pdf",
               show=False)

    plt.close("all")
github colomemaria / epiScanpy / episcanpy / preprocessing / _recipe.py View on Github external
X_tsne : `np.ndarray` (`adata.obs`, dtype `float`)
        tSNE coordinates of data.
    X_umap : `adata.obsm`
        UMAP coordinates of data.
    
    
    '''
    if copy:
        adata = adata.copy() 
    else:
        adata

    if pp_pca:
        sc.pp.pca(adata, n_comps=nb_pcs, svd_solver=svd_solver)
    
    sc.pp.neighbors(adata,  n_neighbors=n_neighbors, n_pcs=nb_pcs, method=method, metric=metric)
    #sc.tl.pca(adata, n_comps=nb_pcs)
    sc.tl.tsne(adata, n_pcs=nb_pcs, perplexity=perplexity)
    sc.tl.umap(adata, min_dist, spread, n_components)
    
    if copy:
        return(adata)
    else:
        None
github czbiohub / molecular-cross-validation / examples / sweep.py View on Github external
def recipe_seurat(adata):
    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    sc.pp.scale(adata, max_value=10, zero_center=False)
    return adata
github theislab / scgen / scgen / models / util.py View on Github external
plt.close("all")

    elif isinstance(network, scgen.CVAE):
        true_labels, _ = scgen.label_encoder(train)


        if sparse.issparse(train.X):
            latent = network.to_latent(train.X.A, labels=true_labels)
        else:
            latent = network.to_latent(train.X, labels=true_labels)
        latent = sc.AnnData(X=latent,
                            obs={condition_key: train.obs[condition_key].tolist(),
                                 cell_type_key: train.obs[cell_type_key].tolist()})
        if plot_umap:
            sc.pp.neighbors(latent)
            sc.tl.umap(latent)
            sc.pl.umap(latent, color=[condition_key, cell_type_key],
                       save=f"_latent",
                       show=False)

        cell_type_data = train[train.obs[cell_type_key] == cell_type]
        fake_labels = np.ones(shape=(cell_type_data.shape[0], 1))

        pred = network.predict(data=cell_type_data, labels=fake_labels)

        pred_adata = anndata.AnnData(pred, obs={condition_key: ["pred"] * len(pred)},
                                     var={"var_names": cell_type_data.var_names})

        all_adata = cell_type_data.concatenate(pred_adata)
        sc.tl.rank_genes_groups(cell_type_data, groupby=condition_key, n_genes=100)
        diff_genes = cell_type_data.uns["rank_genes_groups"]["names"][conditions["stim"]]
github normjam / benchmark / normbench / methods / lognorm_runner.py View on Github external
def run(self):
        sc.pp.normalize_per_cell(self.data, copy=True)
        sc.pp.log1p(self.data)

        # Normalized matrix
        self.dump_to_h5ad("lognorm_normalized")
github theislab / dca / dca / io.py View on Github external
def normalize(adata, filter_min_counts=True, size_factors=True, normalize_input=True, logtrans_input=True):

    if filter_min_counts:
        sc.pp.filter_genes(adata, min_counts=1)
        sc.pp.filter_cells(adata, min_counts=1)

    if size_factors or normalize_input or logtrans_input:
        adata.raw = adata.copy()
    else:
        adata.raw = adata

    if size_factors:
        sc.pp.normalize_per_cell(adata)
        adata.obs['size_factors'] = adata.obs.n_counts / np.median(adata.obs.n_counts)
    else:
        adata.obs['size_factors'] = 1.0

    if logtrans_input:
        sc.pp.log1p(adata)

    if normalize_input:
github YosefLab / scVI / scvi / dataset / dataset.py View on Github external
data=dict(batch=self.batch_indices.squeeze()),
            index=np.arange(self.nb_cells),
        ).astype("category")

        counts = sp_sparse.csc_matrix(self.X.copy())
        adata = sc.AnnData(X=counts, obs=obs)
        batch_key = "batch" if (batch_correction and self.n_batches >= 2) else None
        if flavor in ["cell_ranger", "seurat_v2"]:
            if flavor == "seurat_v2":
                # name expected by scanpy
                flavor = "seurat"

            # Counts normalization
            sc.pp.normalize_total(adata, target_sum=1e4)
            # logarithmed data
            sc.pp.log1p(adata)

            # Finding top genes
            sc.pp.highly_variable_genes(
                adata=adata,
                n_top_genes=n_top_genes,
                flavor=flavor,
                batch_key=batch_key,
                inplace=True,  # inplace=False looks buggy
                **highly_var_genes_kwargs,
            )

        elif flavor == "seurat_v3":
            seurat_v3_highly_variable_genes(
                adata, n_top_genes=n_top_genes, batch_key=batch_key
            )