How to use the anndata.base._normalize_index function in anndata

To help you get started, we’ve selected a few anndata examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github theislab / scanpy / scanpy / tools / _rna_velocity.py View on Github external
"""
    # also see notebook "scanpy_rna_velocity_all"

    # the following only works in theory...
    
    # this is n_genes x n_cells
    ds = loompy.connect('./all_sgete_4GU75.loom')
    row_attrs = dict(ds.row_attrs.items())
    col_attrs = dict(ds.col_attrs.items())
    gene_names = [gene for gene in row_attrs['Gene'] if gene in adata.var_names]
    cell_names = [cell for cell in col_attrs['CellID'] if cell in adata.obs_names]

    # subset the s and u matrices to the genes in adata
    from anndata.base import _normalize_index
    gene_index = _normalize_index(gene_names, adata.var_names)
    cell_index = _normalize_index(cell_names, adata.obs_names)
    if len(cell_index) == 0:
        raise ValueError(
            'Cell names in loom file do not match cell names in AnnData.')
    # subset to cells and genes present in adata
    ad_s = AnnData(ds.layer['spliced'].sparse(gene_index, cell_index).tocsr().T)
    ad_u = AnnData(ds.layer['unspliced'].sparse(gene_index, cell_index).tocsr().T)
    ds.close()

    subset, _ = sc.pp.filter_genes(ad_u.X, min_cells=50)
    print(np.sum(subset))
    ad_s = ad_s[:, subset]
    ad_u = ad_u[:, subset]
    ad_s.var_names = np.array(gene_names)[subset]

    # loop over genes
    from scipy.sparse import dok_matrix
github theislab / scanpy / scanpy / tools / _rna_velocity.py View on Github external
vector.
    """
    # also see notebook "scanpy_rna_velocity_all"

    # the following only works in theory...
    
    # this is n_genes x n_cells
    ds = loompy.connect('./all_sgete_4GU75.loom')
    row_attrs = dict(ds.row_attrs.items())
    col_attrs = dict(ds.col_attrs.items())
    gene_names = [gene for gene in row_attrs['Gene'] if gene in adata.var_names]
    cell_names = [cell for cell in col_attrs['CellID'] if cell in adata.obs_names]

    # subset the s and u matrices to the genes in adata
    from anndata.base import _normalize_index
    gene_index = _normalize_index(gene_names, adata.var_names)
    cell_index = _normalize_index(cell_names, adata.obs_names)
    if len(cell_index) == 0:
        raise ValueError(
            'Cell names in loom file do not match cell names in AnnData.')
    # subset to cells and genes present in adata
    ad_s = AnnData(ds.layer['spliced'].sparse(gene_index, cell_index).tocsr().T)
    ad_u = AnnData(ds.layer['unspliced'].sparse(gene_index, cell_index).tocsr().T)
    ds.close()

    subset, _ = sc.pp.filter_genes(ad_u.X, min_cells=50)
    print(np.sum(subset))
    ad_s = ad_s[:, subset]
    ad_u = ad_u[:, subset]
    ad_s.var_names = np.array(gene_names)[subset]

    # loop over genes