How to use scanpy - 10 common examples

To help you get started, we’ve selected a few scanpy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github theislab / trVAE / tests / monitor_beta.py View on Github external
def create_data(data_dict):
    data_name = data_dict['name']
    source_keys = data_dict.get("source_conditions")
    target_keys = data_dict.get("target_conditions")
    cell_type_key = data_dict.get("cell_type_key", None)
    condition_key = data_dict.get('condition_key', 'condition')
    spec_cell_type = data_dict.get("spec_cell_types", None)[0]

    adata = sc.read(f"./data/{data_name}/{data_name}_normalized.h5ad")
    adata = adata[adata.obs[condition_key].isin(source_keys + target_keys)]

    if adata.shape[1] > 2000:
        sc.pp.highly_variable_genes(adata, n_top_genes=2000)
        adata = adata[:, adata.var['highly_variable']]

    train_adata, valid_adata = train_test_split(adata, 0.80)

    net_train_adata = train_adata.copy()[~((train_adata.obs[cell_type_key] == spec_cell_type) &
                                           (train_adata.obs[condition_key].isin(target_keys)))]
    net_valid_adata = valid_adata.copy()[~((valid_adata.obs[cell_type_key] == spec_cell_type) &
                                           (valid_adata.obs[condition_key].isin(target_keys)))]
    return adata, net_train_adata, net_valid_adata
github theislab / trVAE / tests / monitor_beta.py View on Github external
def create_data(data_dict):
    data_name = data_dict['name']
    source_keys = data_dict.get("source_conditions")
    target_keys = data_dict.get("target_conditions")
    cell_type_key = data_dict.get("cell_type_key", None)
    condition_key = data_dict.get('condition_key', 'condition')
    spec_cell_type = data_dict.get("spec_cell_types", None)[0]

    adata = sc.read(f"./data/{data_name}/{data_name}_normalized.h5ad")
    adata = adata[adata.obs[condition_key].isin(source_keys + target_keys)]

    if adata.shape[1] > 2000:
        sc.pp.highly_variable_genes(adata, n_top_genes=2000)
        adata = adata[:, adata.var['highly_variable']]

    train_adata, valid_adata = train_test_split(adata, 0.80)

    net_train_adata = train_adata.copy()[~((train_adata.obs[cell_type_key] == spec_cell_type) &
                                           (train_adata.obs[condition_key].isin(target_keys)))]
    net_valid_adata = valid_adata.copy()[~((valid_adata.obs[cell_type_key] == spec_cell_type) &
                                           (valid_adata.obs[condition_key].isin(target_keys)))]
    return adata, net_train_adata, net_valid_adata
github theislab / trVAE / tests / mmd_resnet.py View on Github external
calibMMDNet.fit(train_real_ctrl, sourceLabels, nb_epoch=n_epochs, batch_size=batch_size, validation_split=0.1,
                    verbose=2, callbacks=[lrate, EarlyStopping(monitor='val_loss', patience=50, mode='auto')])

    path_to_save = f"../results/MMDResNet/{data_name}/{spec_cell_type}"
    sc.settings.figdir = os.path.abspath(path_to_save)
    sc.settings.writedir = os.path.abspath(path_to_save)

    CD4T = data.copy()[data.obs[cell_type_key] == spec_cell_type]
    ctrl_CD4T = data.copy()[(data.obs[cell_type_key] == spec_cell_type) & (data.obs['condition'] == source_key)]
    stim_CD4T = data.copy()[(data.obs[cell_type_key] == spec_cell_type) & (data.obs['condition'] == target_key)]
    if sparse.issparse(ctrl_CD4T.X):
        ctrl_CD4T.X = ctrl_CD4T.X.A
        stim_CD4T.X = stim_CD4T.X.A

    if data_name == "pbmc":
        sc.tl.rank_genes_groups(CD4T, groupby="condition", n_genes=100, method="wilcoxon")
        top_100_genes = CD4T.uns["rank_genes_groups"]["names"][target_key].tolist()
        gene_list = top_100_genes[:10]
    else:
        sc.tl.rank_genes_groups(CD4T, groupby="condition", n_genes=100, method="wilcoxon")
        top_50_down_genes = CD4T.uns["rank_genes_groups"]["names"][source_key].tolist()
        top_50_up_genes = CD4T.uns["rank_genes_groups"]["names"][target_key].tolist()
        top_100_genes = top_50_up_genes + top_50_down_genes
        gene_list = top_50_down_genes[:5] + top_50_up_genes[:5]

    pred_stim = calibMMDNet.predict(ctrl_CD4T.X)
    all_Data = sc.AnnData(np.concatenate([ctrl_CD4T.X, stim_CD4T.X, pred_stim]))
    all_Data.obs["condition"] = ["ctrl"] * len(ctrl_CD4T.X) + ["real_stim"] * len(stim_CD4T.X) + \
                                ["pred_stim"] * len(pred_stim)
    all_Data.var_names = CD4T.var_names

    trvae.plotting.reg_var_plot(all_Data,
github theislab / trVAE / tests / test_vae.py View on Github external
z_dim=100,
                  subsample=None,
                  alpha=0.001,
                  n_epochs=500,
                  batch_size=512,
                  dropout_rate=0.2,
                  learning_rate=0.001,
                  gpus=1,
                  verbose=2,
                  arch_style=1,
                  ):
    data_name = data_dict['name']
    metadata_path = data_dict['metadata']
    cell_type_key = data_dict['cell_type']

    train_data = sc.read(f"../data/{data_name}/anna/processed_adata_Cusanovich_brain_May29_2019_5000.h5ad")
    train_data.X += abs(train_data.X.min())
    if subsample is not None:
        train_data = train_data[:subsample]

    spec_cell_type = data_dict.get("spec_cell_types", None)
    if spec_cell_type is not []:
        cell_types = spec_cell_type

    train_size = int(train_data.shape[0] * 0.85)
    indices = np.arange(train_data.shape[0])
    np.random.shuffle(indices)
    train_idx = indices[:train_size]
    valid_idx = indices[train_size:]

    net_train_data = train_data.copy()[train_idx, :]
    net_valid_data = train_data.copy()[valid_idx, :]
github theislab / scgen / tests / test_vae_keras.py View on Github external
stim_key = "Hpoly.Day10"
        ctrl_key = "Control"
        cell_type_key = "cell_label"
        train = sc.read("../data/ch10_train_7000.h5ad")
    elif data_name == "salmonella":
        cell_type_to_monitor = None
        stim_key = "Salmonella"
        ctrl_key = "Control"
        cell_type_key = "cell_label"
        train = sc.read("../data/chsal_train_7000.h5ad")
    elif data_name == "species":
        cell_type_to_monitor = "rat"
        stim_key = "LPS6"
        ctrl_key = "unst"
        cell_type_key = "species"
        train = sc.read("../data/train_all_lps6.h5ad")

    for cell_type in train.obs[cell_type_key].unique().tolist():
        os.makedirs(f"./vae_results/{data_name}/{cell_type}/", exist_ok=True)
        os.chdir(f"./vae_results/{data_name}/{cell_type}")
        net_train_data = train[~((train.obs[cell_type_key] == cell_type) & (train.obs[condition_key] == stim_key))]
        network = scgen.VAEArithKeras(x_dimension=net_train_data.X.shape[1],
                                 z_dimension=z_dim,
                                 alpha=alpha,
                                 dropout_rate=dropout_rate,
                                 learning_rate=learning_rate)

        # network.restore_model()
        network.train(net_train_data, train, n_epochs=n_epochs, batch_size=batch_size, verbose=2,
                      conditions={"ctrl": ctrl_key, "stim": stim_key},
                      condition_key=condition_key, cell_type_key=cell_type_key,
                      cell_type=cell_type, path_to_save="./figures/keras/")
github theislab / scgen / tests / test_vae.py View on Github external
def plot_reg_mean_with_genes(data_name="pbmc", gene_list=None):
    if data_name == "pbmc":
        stim_key = "stimulated"
        ctrl_key = "control"
        cell_type_key = "cell_type"
        train = sc.read("../data/train.h5ad")
    elif data_name == "hpoly":
        stim_key = "Hpoly.Day10"
        ctrl_key = "Control"
        cell_type_key = "cell_label"
        train = sc.read("../data/ch10_train_7000.h5ad")
    elif data_name == "salmonella":
        stim_key = "Salmonella"
        ctrl_key = "Control"
        cell_type_key = "cell_label"
        train = sc.read("../data/chsal_train_7000.h5ad")
    elif data_name == "species":
        stim_key = "LPS6"
        ctrl_key = "unst"
        cell_type_key = "species"
        train = sc.read("../data/train_all_lps6.h5ad")
    recon_data = sc.read(f"./vae_results/{data_name}/reconstructed.h5ad")
github theislab / trVAE / tests / test_trVAE.py View on Github external
def visualize_trained_network_results(data_dict, z_dim=100):
    plt.close("all")
    data_name = data_dict.get('name', None)
    source_key = data_dict.get('source_key', None)
    target_key = data_dict.get('target_key', None)
    cell_type_key = data_dict.get("cell_type", None)

    data = sc.read(f"../data/{data_name}/train_{data_name}.h5ad")
    cell_types = data.obs[cell_type_key].unique().tolist()

    spec_cell_type = data_dict.get("spec_cell_types", None)
    if spec_cell_type:
        cell_types = spec_cell_type

    for cell_type in cell_types:
        path_to_save = f"../results/RCVAE/{data_name}/{cell_type}/{z_dim}/{source_key} to {target_key}/Visualizations/"
        os.makedirs(path_to_save, exist_ok=True)
        sc.settings.figdir = os.path.abspath(path_to_save)

        train_data = data.copy()[~((data.obs['condition'] == target_key) & (data.obs[cell_type_key] == cell_type))]

        cell_type_adata = data[data.obs[cell_type_key] == cell_type]

        network = trvae.trVAE(x_dimension=data.shape[1],
github theislab / trVAE / tests / test_trVAEMulti.py View on Github external
def visualize_batch_correction(data_dict, z_dim=100, mmd_dimension=128):
    plt.close("all")
    data_name = data_dict['name']
    source_keys = data_dict.get("source_conditions")
    target_keys = data_dict.get("target_conditions")
    cell_type_key = data_dict.get("cell_type", None)
    need_merge = data_dict.get('need_merge', False)
    label_encoder = data_dict.get('label_encoder', None)
    condition_key = data_dict.get('condition', 'condition')

    if need_merge:
        data, _ = merge_data(data_dict)
    else:
        data = sc.read(f"../data/{data_name}/train_{data_name}.h5ad")

    cell_types = data.obs[cell_type_key].unique().tolist()

    spec_cell_type = data_dict.get("spec_cell_types", None)
    if spec_cell_type:
        cell_types = spec_cell_type

    for cell_type in cell_types:
        path_to_save = f"../results/RCVAEMulti/{data_name}/{cell_type}/{z_dim}/Visualizations/"
        os.makedirs(path_to_save, exist_ok=True)
        sc.settings.figdir = os.path.abspath(path_to_save)

        train_data = data.copy()[
            ~((data.obs[condition_key].isin(target_keys)) & (data.obs[cell_type_key] == cell_type))]

        cell_type_adata = data[data.obs[cell_type_key] == cell_type]
github theislab / scgen / tests / test_mmd_cvae.py View on Github external
pred = network.predict(data=unperturbed_data, encoder_labels=true_labels, decoder_labels=fake_labels)
        pred_adata = anndata.AnnData(pred, obs={condition_key: ["pred"] * len(pred)},
                                     var={"var_names": cell_type_data.var_names})
        all_adata = cell_type_data.concatenate(pred_adata)

        scgen.plotting.reg_mean_plot(all_adata, condition_key=condition_key,
                                     axis_keys={"x": ctrl_key, "y": stim_key, "y1": "pred"},
                                     gene_list=diff_genes,
                                     path_to_save=f"./figures/reg_mean_{z_dim}.pdf")
        scgen.plotting.reg_var_plot(all_adata, condition_key=condition_key,
                                    axis_keys={"x": ctrl_key, "y": stim_key, 'y1': "pred"},
                                    gene_list=diff_genes,
                                    path_to_save=f"./figures/reg_var_{z_dim}.pdf")

        sc.pp.neighbors(all_adata)
        sc.tl.umap(all_adata)
        sc.pl.umap(all_adata, color=condition_key,
                   save="pred")

        sc.pl.violin(all_adata, keys=diff_genes.tolist()[0], groupby=condition_key,
                     save=f"_{z_dim}_{diff_genes.tolist()[0]}")

        os.chdir("../../../")
github theislab / trVAE / tests / test_trVAE.py View on Github external
latent_with_fake_labels = sc.AnnData(X=latent_with_fake_labels)
        latent_with_fake_labels.obs['condition'] = data.obs['condition'].values
        latent_with_fake_labels.obs[cell_type_key] = data.obs[cell_type_key].values

        mmd_latent_with_true_labels = sc.AnnData(X=mmd_latent_with_true_labels)
        mmd_latent_with_true_labels.obs['condition'] = data.obs['condition'].values
        mmd_latent_with_true_labels.obs[cell_type_key] = data.obs[cell_type_key].values

        mmd_latent_with_fake_labels = sc.AnnData(X=mmd_latent_with_fake_labels)
        mmd_latent_with_fake_labels.obs['condition'] = data.obs['condition'].values
        mmd_latent_with_fake_labels.obs[cell_type_key] = data.obs[cell_type_key].values

        color = ['condition', cell_type_key]

        sc.pp.neighbors(train_data)
        sc.tl.umap(train_data)
        sc.pl.umap(train_data, color=color,
                   save=f'_{data_name}_{cell_type}_train_data',
                   show=False)

        sc.pp.neighbors(latent_with_true_labels)
        sc.tl.umap(latent_with_true_labels)
        sc.pl.umap(latent_with_true_labels, color=color,
                   save=f"_{data_name}_{cell_type}_latent_with_true_labels",
                   show=False)

        sc.pp.neighbors(latent_with_fake_labels)
        sc.tl.umap(latent_with_fake_labels)
        sc.pl.umap(latent_with_fake_labels, color=color,
                   save=f"_{data_name}_{cell_type}_latent_with_fake_labels",
                   show=False)