How to use the scanpy.api function in scanpy

To help you get started, we’ve selected a few scanpy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github theislab / trVAE / tests / st_gan.py View on Github external
arguments_group.add_argument('-t', '--do_train', type=int, default=1, required=False,
                             help='Batch Size')
arguments_group.add_argument('-r', '--dropout_rate', type=float, default=0.5, required=False,
                             help='Dropout ratio')

args = vars(parser.parse_args())
data_dict = DATASETS[args['data']]
data_name = data_dict.get('name', None)
cell_type_key = data_dict.get("cell_type", None)
source_key = data_dict.get('source_key')
target_key = data_dict.get('target_key')

train_path = f"../data/{data_name}/train_{data_name}.h5ad"
valid_path = f"../data/{data_name}/valid_{data_name}.h5ad"

data = sc.read(train_path)
validation = sc.read(valid_path)

if sparse.issparse(data.X):
    data.X = data.X.A
if sparse.issparse(validation.X):
    validation.X = validation.X.A

# =============================== data gathering ====================================
spec_cell_types = data_dict.get('spec_cell_types', None)
cell_types = data.obs[cell_type_key].unique().tolist()

for spec_cell_type in spec_cell_types:
    train_real = data.copy()[~((data.obs['condition'] == target_key) & (data.obs[cell_type_key] == spec_cell_type))]
    train_real_stim = train_real[train_real.obs["condition"] == target_key]
    train_real_ctrl = train_real[train_real.obs["condition"] == source_key]
    train_real_stim = train_real_stim.X
github maximilianh / cellBrowser / src / cbPyLib / cellbrowser / cellbrowser.py View on Github external
mito_genes = [name for name in adata.var_names if name.split('.')[0] in gencodeMitos]

        if(len(mito_genes)==0): # no single mitochondrial gene in the expression matrix ?
            pipeLog("WARNING - No single mitochondrial gene was found in the expression matrix.")
            pipeLog("Dying cells cannot be removed - please check your expression matrix")
            doMito = False
        else:
            doMito = True

            adata.obs['percent_mito'] = np.sum(adata[:, mito_genes].X, axis=1) / np.sum(adata.X, axis=1)
            adata.obs['UMI_Count'] = np.sum(adata.X, axis=1)

            sc.pl.violin(adata, ['n_genes', 'UMI_Count', 'percent_mito'], jitter=0.4, multi_panel=True)

            fig1=sc.pl.scatter(adata, x='UMI_Count', y='percent_mito', save="_percent_mito")
            fig2=sc.pl.scatter(adata, x='UMI_Count', y='n_genes', save="_gene_count")

            adata = adata[adata.obs['percent_mito'] < thrsh_mito, :]

    if conf.get("doFilterGenes", True):
        up_thrsh_genes=conf.get("filterMaxGenes", 15000)
        low_thrsh_genes=conf.get("filterMinGenes", 10)
        pipeLog("Remove cells with less than %d and more than %d genes" % (low_thrsh_genes, up_thrsh_genes))

        #Filtering out cells according to filter parameters
        pipeLog('Filtering cells')
        adata = adata[adata.obs['n_genes'] < up_thrsh_genes, :]
        adata = adata[adata.obs['n_genes'] > low_thrsh_genes, :]

        pipeLog("After filtering: Data has %d samples/observations and %d genes/variables" % (len(adata.obs), len(adata.var)))

    if conf.get("doNormalize", True):
github theislab / scgen / code / vec_arith.py View on Github external
# =============================== downloading training and validation files ====================================
# we do not use the validation data to apply vectroe arithmetics in gene expression space

train_path = "../data/train_kang.h5ad"
valid_path = "../data/valid_kang.h5ad"

if os.path.isfile(train_path):
    data = sc.read(train_path)
else:
    train_url = "https://drive.google.com/uc?export=download&id=1-RpxbXwXEJLYZDFSHnWYenojZ8TxRZsP"
    t_dl = wget.download(train_url, train_path)
    data = sc.read(train_path)

if os.path.isfile(valid_path):
    validation = sc.read(valid_path)
else:
    train_url = "https://drive.google.com/uc?export=download&id=1-RpxbXwXEJLYZDFSHnWYenojZ8TxRZsP"
    t_dl = wget.download(train_url, valid_path)
    validation = sc.read(valid_path)
# =============================== data gathering ====================================
#training cells
t_in = ['CD8T','NK','B','Dendritic', 'FCGR3A+Mono','CD14+Mono']
#heldout cells
t_out = [ 'CD4T']
dr = data_reader(data, validation,{"ctrl":"control", "stim":"stimulated"}, t_in, t_out)



train_real_cd = dr.train_real_adata[dr.train_real_adata.obs["condition"] == "control",:]
train_real_cd = dr.balancer(train_real_cd)
train_real_stimulated = dr.train_real_adata[dr.train_real_adata.obs["condition"] == "stimulated",:]
github theislab / scgen / code / vec_arith.py View on Github external
from hf import *
import numpy as np
import scanpy.api as sc
import os
from data_reader import data_reader
import wget


# =============================== downloading training and validation files ====================================
# we do not use the validation data to apply vectroe arithmetics in gene expression space

train_path = "../data/train_kang.h5ad"
valid_path = "../data/valid_kang.h5ad"

if os.path.isfile(train_path):
    data = sc.read(train_path)
else:
    train_url = "https://drive.google.com/uc?export=download&id=1-RpxbXwXEJLYZDFSHnWYenojZ8TxRZsP"
    t_dl = wget.download(train_url, train_path)
    data = sc.read(train_path)

if os.path.isfile(valid_path):
    validation = sc.read(valid_path)
else:
    train_url = "https://drive.google.com/uc?export=download&id=1-RpxbXwXEJLYZDFSHnWYenojZ8TxRZsP"
    t_dl = wget.download(train_url, valid_path)
    validation = sc.read(valid_path)
# =============================== data gathering ====================================
#training cells
t_in = ['CD8T','NK','B','Dendritic', 'FCGR3A+Mono','CD14+Mono']
#heldout cells
t_out = [ 'CD4T']
github theislab / scgen / code / vec_arith_pca.py View on Github external
import numpy as np
import scanpy.api as sc
import os
from data_reader import data_reader
import wget
from sklearn.decomposition import PCA


# =============================== downloading training and validation files ====================================
# we do not use the validation data to apply vectroe arithmetics in gene expression space

train_path = "../data/train_kang.h5ad"
valid_path = "../data/valid_kang.h5ad"

if os.path.isfile(train_path):
    data = sc.read(train_path)
else:
    train_url = "https://drive.google.com/uc?export=download&id=1-RpxbXwXEJLYZDFSHnWYenojZ8TxRZsP"
    t_dl = wget.download(train_url, train_path)
    data = sc.read(train_path)

if os.path.isfile(valid_path):
    validation = sc.read(valid_path)
else:
    train_url = "https://drive.google.com/uc?export=download&id=1-RpxbXwXEJLYZDFSHnWYenojZ8TxRZsP"
    t_dl = wget.download(train_url, valid_path)
    validation = sc.read(valid_path)
# =============================== data gathering ====================================
#training cells
t_in = ['CD8T','NK','B','Dendritic', 'FCGR3A+Mono','CD14+Mono']
#heldout cells
t_out = [ 'CD4T']
github brianhie / geosketch / geosketch / sketch.py View on Github external
def louvain(X, N, resolution=1, seed=None, replace=False):
    from anndata import AnnData
    import scanpy.api as sc

    adata = AnnData(X=X)
    sc.pp.neighbors(adata, use_rep='X')
    sc.tl.louvain(adata, resolution=resolution, key_added='louvain')
    cluster_labels_full = adata.obs['louvain'].tolist()

    louv = {}
    for i, cluster in enumerate(cluster_labels_full):
        if cluster not in louv:
            louv[cluster] = []
        louv[cluster].append(i)

    lv_idx = []
    for n in range(N):
        louv_cells = list(louv.keys())
        louv_cell = louv_cells[np.random.choice(len(louv_cells))]
        samples = list(louv[louv_cell])
        sample = samples[np.random.choice(len(samples))]
        if not replace:
github brianhie / geosketch / bin / experiments.py View on Github external
full_labels = label_approx(X_dimred, X_dimred[samp_idx, :],
                                   spect.labels_, k=5)
                
        bnmi = normalized_mutual_info_score(
            cell_labels, full_labels, dist='balanced'
        )
        nmi = normalized_mutual_info_score(cell_labels, full_labels)
        stats.append(nmi)
        stats.append(bnmi)
        
    if 'louvain_ami' in kwargs and kwargs['louvain_ami']:
        cell_labels = kwargs['cell_labels']

        adata = AnnData(X=X_dimred[samp_idx, :])
        sc.pp.neighbors(adata, use_rep='X')
        
        amis = []
        bamis = []
        
        for r in [ 0.5, 1., 2. ]:
            sc.tl.louvain(adata, resolution=r, key_added='louvain')
            louv_labels = np.array(adata.obs['louvain'].tolist())

            full_labels = label_approx(X_dimred, X_dimred[samp_idx, :],
                                       louv_labels, k=5)

            ami = adjusted_mutual_info_score(cell_labels, full_labels)
            bami = adjusted_mutual_info_score(
                cell_labels, full_labels, dist='balanced'
            )
            amis.append(ami)
github theislab / scgen / code / cvae.py View on Github external
from data_reader import data_reader




# =============================== downloading training and validation files ====================================

train_path = "../data/train_kang.h5ad"
valid_path = "../data/valid_kang.h5ad"

if os.path.isfile(train_path):
    data = sc.read(train_path)
else:
    train_url = "https://drive.google.com/uc?export=download&id=1-RpxbXwXEJLYZDFSHnWYenojZ8TxRZsP"
    t_dl = wget.download(train_url, train_path)
    data = sc.read(train_path)
    
if os.path.isfile(valid_path):
    validation = sc.read(valid_path)
else:
    train_url = "https://drive.google.com/uc?export=download&id=1-RpxbXwXEJLYZDFSHnWYenojZ8TxRZsP"
    t_dl = wget.download(train_url,valid_path)
    validation = sc.read(valid_path)

# =============================== data gathering ====================================
#training cells
t_in = ['CD8T','NK','B','Dendritic', 'FCGR3A+Mono','CD14+Mono']
#heldout cells
t_out = [ 'CD4T']
dr = data_reader(data, validation,{"ctrl":"control", "stim":"stimulated"}, t_in, t_out)
train_real = dr.train_real_adata
valid_real = dr.valid_real_adata
github theislab / scgen / code / st_gan.py View on Github external
import tensorflow as tf
import numpy as np
import os
import scanpy.api as sc
import wget
from data_reader import data_reader
from random import shuffle


# =============================== downloading training and validation files ====================================
train_path = "../data/train_kang.h5ad"
valid_path = "../data/valid_kang.h5ad"

if os.path.isfile(train_path):
    data = sc.read(train_path)
else:
    train_url = "https://drive.google.com/uc?export=download&id=1-RpxbXwXEJLYZDFSHnWYenojZ8TxRZsP"
    t_dl = wget.download(train_url, train_path)
    data = sc.read(train_path)

if os.path.isfile(valid_path):
    validation = sc.read(valid_path)
else:
    train_url = "https://drive.google.com/uc?export=download&id=1-RpxbXwXEJLYZDFSHnWYenojZ8TxRZsP"
    t_dl = wget.download(train_url, valid_path)
    validation = sc.read(valid_path)
# =============================== data gathering ====================================
#training cells
t_in = ['CD8T','NK','B','Dendritic', 'FCGR3A+Mono','CD14+Mono']
#heldout cells
t_out = [ 'CD4T']
github maximilianh / cellBrowser / src / cbPyLib / cellbrowser.py View on Github external
def readMatrixAnndata(matrixFname, samplesOnRows=False):
    " read an expression matrix and return an adata object. Supports .mtx, .h5 and .tsv (not .tsv.gz) "
    import scanpy.api as sc
    #adata = sc.read(matFname)
    if matrixFname.endswith(".mtx"):
        import pandas as pd
        logging.info("Loading expression matrix: mtx format")
        adata = sc.read(matrixFname, cache=False).T

        mtxDir = dirname(matrixFname)
        adata.var_names = pd.read_csv(join(mtxDir, 'genes.tsv'), header=None, sep='\t')[1]
        adata.obs_names = pd.read_csv(join(mtxDir, 'barcodes.tsv'), header=None)[0]

    else:
        logging.info("Loading expression matrix: tab-sep format")
        adata = sc.read(matrixFname, cache=False , first_column_names=True)
        if not samplesOnRows:
            logging.info("Transposing the expression matrix")
            adata = adata.T

    return adata