How to use the scanpy.logging.info function in scanpy

To help you get started, we’ve selected a few scanpy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github theislab / scanpy / scanpy / tools / _score_genes.py View on Github external
use_raw
        Use `raw` attribute of `adata` if present.

        .. versionchanged:: 1.4.5
           Default value changed from `False` to `None`.

    Returns
    -------
    Depending on `copy`, returns or updates `adata` with an additional field
    `score_name`.

    Examples
    --------
    See this `notebook `__.
    """
    start = logg.info(f'computing score {score_name!r}')
    adata = adata.copy() if copy else adata

    if random_state is not None:
        np.random.seed(random_state)

    gene_list_in_var = []
    var_names = adata.raw.var_names if use_raw else adata.var_names
    genes_to_ignore = []
    for gene in gene_list:
        if gene in var_names:
            gene_list_in_var.append(gene)
        else:
            genes_to_ignore.append(gene)
    if len(genes_to_ignore) > 0:
        logg.warning(f'genes are not in var_names and ignored: {genes_to_ignore}')
    gene_list = set(gene_list_in_var[:])
github theislab / scanpy / scanpy / neighbors / __init__.py View on Github external
evals, evecs = scipy.linalg.eigh(matrix)
        else:
            n_comps = min(matrix.shape[0]-1, n_comps)
            # ncv = max(2 * n_comps + 1, int(np.sqrt(matrix.shape[0])))
            ncv = None
            which = 'LM' if sort == 'decrease' else 'SM'
            # it pays off to increase the stability with a bit more precision
            matrix = matrix.astype(np.float64)
            evals, evecs = scipy.sparse.linalg.eigsh(
                matrix, k=n_comps, which=which, ncv=ncv
            )
            evals, evecs = evals.astype(np.float32), evecs.astype(np.float32)
        if sort == 'decrease':
            evals = evals[::-1]
            evecs = evecs[:, ::-1]
        logg.info(
            '    eigenvalues of transition matrix\n'
            '    {}'.format(str(evals).replace('\n', '\n    '))
        )
        if self._number_connected_components > len(evals)/2:
            logg.warning('Transition matrix has many disconnected components!')
        self._eigen_values = evals
        self._eigen_basis = evecs
github theislab / scanpy / scanpy / tools / _tsne.py View on Github external
'(https://github.com/DmitryUlyanov/Multicore-TSNE). '
                'Even for n_jobs=1 this speeds up the computation considerably '
                'and might yield better converged results.'
            )
    if X_tsne is None:
        from sklearn.manifold import TSNE
        from . import _tsne_fix  # fix by D. DeTomaso for sklearn < 0.19

        # unfortunately, sklearn does not allow to set a minimum number
        # of iterations for barnes-hut tSNE
        tsne = TSNE(**params_sklearn)
        logg.info('    using sklearn.manifold.TSNE with a fix by D. DeTomaso')
        X_tsne = tsne.fit_transform(X)
    # update AnnData instance
    adata.obsm['X_tsne'] = X_tsne  # annotate samples with tSNE coordinates
    logg.info(
        '    finished',
        time=start,
        deep="added\n    'X_tsne', tSNE coordinates (adata.obsm)",
    )
    return adata if copy else None
github theislab / scanpy / scanpy / preprocessing / _recipes.py View on Github external
pp.filter_genes(adata, min_counts=1)
    # normalize with total UMI count per cell
    normalize_total(adata, key_added='n_counts_all')
    filter_result = filter_genes_dispersion(
        adata.X, flavor='cell_ranger', n_top_genes=n_top_genes, log=False
    )
    if plot:  # should not import at the top of the file
        from ..plotting import _preprocessing as ppp
        ppp.filter_genes_dispersion(filter_result, log=True)
    # actually filter the genes, the following is the inplace version of
    #     adata = adata[:, filter_result.gene_subset]
    adata._inplace_subset_var(filter_result.gene_subset)  # filter genes
    normalize_total(adata)  # renormalize after filtering
    if log: pp.log1p(adata)  # log transform: X = log(X + 1)
    pp.scale(adata)
    logg.info('    finished', time=start)
    return adata if copy else None
github theislab / scanpy / scanpy / tools / _dpt.py View on Github external
def _diffmap(adata, n_comps=15):
    start = logg.info(f'computing Diffusion Maps using n_comps={n_comps}(=n_dcs)')
    dpt = DPT(adata)
    dpt.compute_transitions()
    dpt.compute_eigen(n_comps=n_comps)
    adata.obsm['X_diffmap'] = dpt.eigen_basis
    adata.uns['diffmap_evals'] = dpt.eigen_values
    logg.info(
        '    finished',
        time=start,
        deep=(
            'added\n'
            '    \'X_diffmap\', diffmap coordinates (adata.obsm)\n'
github theislab / scanpy / scanpy / external / tl / _palantir.py View on Github external
pca_projections, var_r = palantir.utils.run_pca(data_df)
    adata.uns['palantir_pca_results'] = dict(
        pca_projections=pca_projections,
        variance_ratio=var_r,
    )

    logg.info('Diffusion maps in progress ...')
    dm_res = adata.uns['palantir_diff_maps'] = \
        palantir.utils.run_diffusion_maps(pca_projections)
    ms_data = adata.uns['palantir_ms_data'] = \
        palantir.utils.determine_multiscale_space(dm_res)

    logg.info('tSNE in progress ...')
    adata.uns['palantir_tsne'] = palantir.utils.run_tsne(ms_data)

    logg.info('imputation in progress ...')
    adata.uns['palantir_imp_df'] = \
        palantir.utils.run_magic_imputation(data_df, dm_res)

    logg.info('End of processing, start plotting.')
    return None if inplace else adata
github theislab / scanpy / scanpy / readwrite.py View on Github external
def _check_datafile_present_and_download(path, backup_url=None):
    """Check whether the file is present, otherwise download.
    """
    path = Path(path)
    if path.is_file(): return True
    if backup_url is None: return False
    logg.info(
        f'try downloading from url\n{backup_url}\n'
        '... this may take a while but only happens once'
    )
    if not path.parent.is_dir():
        logg.info(f'creating directory {path.parent}/ for saving data')
        path.parent.mkdir(parents=True)

    _download(backup_url, path)
    return True
github theislab / scanpy / scanpy / preprocessing / _simple.py View on Github external
# chunked calculation is not randomized, anyways
    if svd_solver in {'auto', 'randomized'} and not chunked:
        logg.info(
            'Note that scikit-learn\'s randomized PCA might not be exactly '
            'reproducible across different computational platforms. For exact '
            'reproducibility, choose `svd_solver=\'arpack\'.` This will likely '
            'become the Scanpy default in the future.'
        )

    data_is_AnnData = isinstance(data, AnnData)
    if data_is_AnnData:
        adata = data.copy() if copy else data
    else:
        adata = AnnData(data)

    start = logg.info(f'computing PCA with n_comps = {n_comps}')

    if adata.n_vars < n_comps:
        n_comps = adata.n_vars - 1
        logg.debug(
            f'reducing number of computed PCs to {n_comps} '
            f'as dim of data is only {adata.n_vars}'
        )

    if use_highly_variable is True and 'highly_variable' not in adata.var.keys():
        raise ValueError('Did not find adata.var[\'highly_variable\']. '
                         'Either your data already only consists of highly-variable genes '
                         'or consider running `pp.filter_genes_dispersion` first.')
    if use_highly_variable is None:
        use_highly_variable = True if 'highly_variable' in adata.var.keys() else False
    if use_highly_variable:
        logg.info('computing PCA on highly variable genes')
github theislab / scanpy / scanpy / readwrite.py View on Github external
data[:] = dsets['data']
            matrix = csr_matrix(
                (data, dsets['indices'], dsets['indptr']),
                shape=(N, M),
            )
            # the csc matrix is automatically the transposed csr matrix
            # as scanpy expects it, so, no need for a further transpostion
            adata = AnnData(
                matrix,
                dict(obs_names=dsets['barcodes'].astype(str)),
                dict(
                    var_names=dsets['gene_names'].astype(str),
                    gene_ids=dsets['genes'].astype(str),
                ),
            )
            logg.info('', time=start)
            return adata
        except KeyError:
            raise Exception('File is missing one or more required datasets.')
github theislab / scanpy / scanpy / _utils.py View on Github external
Returns
    -------
    asso_names
        List of associated reference names
        (`max_n_names` for each predicted name).
    asso_matrix
        Matrix where rows correspond to the predicted labels and columns to the
        reference labels, entries are proportional to degree of association.
    """
    if normalization not in {'prediction', 'reference'}:
        raise ValueError('`normalization` needs to be either "prediction" or "reference".')
    sanitize_anndata(adata)
    cats = adata.obs[reference].cat.categories
    for cat in cats:
        if cat in settings.categories_to_ignore:
            logg.info(
                f'Ignoring category {cat!r} '
                'as it’s in `settings.categories_to_ignore`.'
            )
    asso_names = []
    asso_matrix = []
    for ipred_group, pred_group in enumerate(
            adata.obs[prediction].cat.categories):
        if '?' in pred_group: pred_group = str(ipred_group)
        # starting from numpy version 1.13, subtractions of boolean arrays are deprecated
        mask_pred = adata.obs[prediction].values == pred_group
        mask_pred_int = mask_pred.astype(np.int8)
        asso_matrix += [[]]
        for ref_group in adata.obs[reference].cat.categories:
            mask_ref = (adata.obs[reference].values == ref_group).astype(np.int8)
            mask_ref_or_pred = mask_ref.copy()
            mask_ref_or_pred[mask_pred] = 1