How to use the threadpoolctl.threadpool_limits function in threadpoolctl

To help you get started, we’ve selected a few threadpoolctl examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github joblib / threadpoolctl / tests / test_threadpoolctl.py View on Github external
def test_threadpool_limits_manual_unregister():
    # Check that threadpool_limits can be used as an object which holds the
    # original state of the threadpools and that can be restored thanks to the
    # dedicated unregister method
    original_info = _threadpool_info()

    limits = threadpool_limits(limits=1)
    try:
        for module in _threadpool_info():
            if is_old_openblas(module):
                continue
            assert module.num_threads == 1
    finally:
        # Restore the original limits so that this test does not have any
        # side-effect.
        limits.unregister()

    assert _threadpool_info() == original_info
github joblib / threadpoolctl / tests / test_threadpoolctl.py View on Github external
def test_shipped_openblas():
    # checks that OpenBLAS effectively uses the number of threads requested by
    # the context manager
    original_info = _threadpool_info()

    openblas_modules = original_info.get_modules("internal_api", "openblas")

    with threadpool_limits(1):
        for module in openblas_modules:
            assert module.get_num_threads() == 1

    assert original_info == _threadpool_info()
github nansencenter / DAPPER / dapper / tools / multiprocessing.py View on Github external
#     "MKL_NUM_THREADS",        # mkl
  #     "VECLIB_MAXIMUM_THREADS", # accelerate
  #     "NUMEXPR_NUM_THREADS"]:   # numexpr
  #     os.environ[envar] = "1"
  # The above may be the safest way to limit thread use on all systems,
  # but requires importing before np. => Instead, use threadpoolctl!
  #
  # >>> import numpy as np
  # >>> from threadpoolctl import threadpool_limits
  # >>> N  = 4*10**3
  # >>> a  = np.random.randn(N, N)
  # >>> # Now start monitoring CPU usage (with e.g. htop).
  # >>> with threadpool_limits(limits=1, user_api='blas'):
  # >>>   a2 = a @ a
  import threadpoolctl
  threadpoolctl.threadpool_limits(1)

  no_MP = False
except ImportError:
  no_MP = True

  def MP_warn():
    if not MP_warn.have_printed: # only print once!
      print(textwrap.dedent("""
        Warning: Multiprocessing (MP) was requsted during execution,
        but has not been properly installed.
        Try re-installing DAPPER with `pip install -e [MP]`.
        """))
      MP_warn.have_printed = True
  MP_warn.have_printed = False
github qucontrol / krotov / src / krotov / optimize.py View on Github external
ValueError: If any controls are not real-valued, or if any update
            shape is not a real-valued function in the range [0, 1]; if using
            `continue_from` with a :class:`.Result` with differing
            `objectives`; if there are any required keys missing in
            `pulse_options`.
    """
    logger = logging.getLogger('krotov')

    # Initialization
    logger.info("Initializing optimization with Krotov's method")
    thread_pool_limiter = None
    if limit_thread_pool is None:
        limit_thread_pool = USE_THREADPOOL_LIMITS
    if limit_thread_pool:
        logger.debug("Setting threadpoolctrl.threadpool_limits")
        thread_pool_limiter = threadpoolctl.threadpool_limits(limits=1)
    if mu is None:
        mu = derivative_wrt_pulse
    second_order = sigma is not None
    if norm is None:
        norm = lambda state: state.norm()
    if overlap is None:
        overlap = _overlap
    if modify_params_after_iter is not None:
        # From a technical perspective, the `modify_params_after_iter` is
        # really just another info_hook, the only difference is the
        # convention that info_hooks shouldn't modify the parameters.
        if info_hook is None:
            info_hook = modify_params_after_iter
        else:
            info_hook = chain(modify_params_after_iter, info_hook)
    if isinstance(propagator, list):
github mpes-kit / mpes / mpes / fprocessing.py View on Github external
# Construct jitter specifications
    jitter_params = {}
    if jittered:
        # Retrieve parameters for histogram jittering, the ordering of the jittering
        # parameters is the same as that for the binning
        jaxes = kwds.pop('jitter_axes', axes)
        jitter_params = {'jitter_axes': jaxes,
                         'jitter_bins': kwds.pop('jitter_bins', nbins),
                         'jitter_amplitude': kwds.pop('jitter_amplitude', 0.5*np.ones(len(jaxes))),
                         'jitter_ranges': kwds.pop('jitter_ranges', ranges),
                         'jitter_type': kwds.pop('jitter_type', 'normal')}

    # limit multithreading in worker threads
    nthreads_per_worker = kwds.pop('nthreads_per_worker', 4)
    threadpool_api = kwds.pop('threadpool_api', 'blas')
    with threadpool_limits(limits=nthreads_per_worker, user_api=threadpool_api):
        # Main loop for binning
        for i in tqdm(range(0, df.npartitions, ncores), disable=not(pbar)):

            coreTasks = [] # Core-level jobs
            for j in range(0, ncores):

                ij = i + j
                if ij >= df.npartitions:
                    break

                dfPartition = df.get_partition(ij) # Obtain dataframe partition
                coreTasks.append(d.delayed(binPartition_numba)(dfPartition, axes, nbins, ranges, jittered, jitter_params))

            if len(coreTasks) > 0:
                coreResults = d.compute(*coreTasks, **kwds)
github mpes-kit / mpes / mpes / fprocessing.py View on Github external
# Reset containers of results
        self.results = {}
        self.combinedresult = {}
        self.combinedresult['binned'] = np.zeros(tuple(nbins))
        tqdm = u.tqdmenv(pbenv)

        ncores = self.ncores

        # Execute binning tasks
        binning_kwds = u.dictmerge({'ret':'histogram'}, binning_kwds)
        
        # limit multithreading in worker threads
        nthreads_per_worker = binning_kwds.pop('nthreads_per_worker', 1)
        threadpool_api = binning_kwds.pop('threadpool_api', 'blas')
        with threadpool_limits(limits=nthreads_per_worker, user_api=threadpool_api):        
            # Construct binning tasks
            for i in tqdm(range(0, len(self.files), ncores), disable=not(pbar)):
                coreTasks = [] # Core-level jobs
                for j in range(0, ncores):
                    # Fill up worker threads
                    ij = i + j
                    if ij >= len(self.files):
                        break

                    file = self.files[ij]
                    coreTasks.append(d.delayed(hdf5Processor(file).localBinning)(axes=axes, nbins=nbins, ranges=ranges, **binning_kwds))

                if len(coreTasks) > 0:
                    coreResults = d.compute(*coreTasks, scheduler=scheduler, **compute_kwds)
                    # Combine all core results for a dataframe partition
                    # Fast parallel version with Dask
github tomMoral / dicodile / benchmarks / comparison_strategies.py View on Github external
def run_one(n_times, n_times_atom, n_atoms, n_channels, noise_level,
            random_state, reg, tol, strategy, dicod_args):

    threadpool_limits(1)

    tag = f"[{strategy} - {n_times} - {reg}]"
    current_time = time.time() - START
    msg = f"\r{tag} started at T={current_time:.0f} sec"
    print(colorify(msg, BLUE))

    X, D_hat, lmbd_max = simulate_data(
        n_times, n_times_atom, n_atoms, n_channels, noise_level,
        random_state=random_state)
    reg_ = reg * lmbd_max

    n_seg = 1
    if strategy == 'lgcd':
        n_seg = 'auto'

    *_, pobj, run_statistics = dicod(X, D_hat, reg_, n_workers=1, tol=tol,
github 1313e / PRISM / prism / emulator / _emulator.py View on Github external
self._emul_s_to_core = list(emul_s_to_core)

            # Save total number of emulator systems
            self._n_emul_s_tot = sum([len(seq) for seq in emul_s_to_core])

            # Assign the emulator systems to the various MPI ranks
            self._emul_s = self._comm.scatter(emul_s_to_core, 0)

            # Calculate the most optimal number of OpenMP threads if using MPI
            if(MPI.COMM_WORLD.Get_size() > 1):
                omp_num_threads = max(1, self._size//self._n_emul_s_tot)
            else:
                omp_num_threads = None

            # Tell workers to use this number of OpenMP threads
            tpc.threadpool_limits(self._comm.bcast(omp_num_threads, 0), 'blas')

        # Workers get their emulator systems assigned
        else:
            # Receive assigned emulator systems
            self._emul_s = self._comm.scatter(None, 0)

            # Set number of OpenMP threads
            tpc.threadpool_limits(self._comm.bcast(None, 0), 'blas')

        # Temporarily manually swap the CFilter for RFilter
        # Every rank logs what systems were assigned to it
        # TODO: Remove the need to do this manually
        logger.filters = [logger.PRISM_filters['RFilter']]
        logger.info("Received emulator systems %s." % (self._emul_s))
        logger.filters = [logger.PRISM_filters['CFilter']]
github cvxgrp / diffcp / diffcp / cone_program.py View on Github external
# serial
        xs, ys, ss, Ds, DTs = [], [], [], [], []
        for i in range(batch_size):
            x, y, s, D, DT = solve_and_derivative(As[i], bs[i], cs[i],
                    cone_dicts[i], warm_starts[i], mode=mode, **kwargs)
            xs += [x]
            ys += [y]
            ss += [s]
            Ds += [D]
            DTs += [DT]
    else:
        # thread pool
        pool = ThreadPool(processes=n_jobs_forward)
        args = [(A, b, c, cone_dict, warm_start, mode, kwargs) for A, b, c, cone_dict, warm_start in \
                    zip(As, bs, cs, cone_dicts, warm_starts)]
        with threadpool_limits(limits=1):
            results = pool.starmap(solve_and_derivative_wrapper, args)
        pool.close()
        xs = [r[0] for r in results]
        ys = [r[1] for r in results]
        ss = [r[2] for r in results]
        Ds = [r[3] for r in results]
        DTs = [r[4] for r in results]

    if n_jobs_backward == 1:
        def D_batch(dAs, dbs, dcs, **kwargs):
            dxs, dys, dss = [], [], []
            for i in range(batch_size):
                dx, dy, ds = Ds[i](dAs[i], dbs[i], dcs[i], **kwargs)
                dxs += [dx]
                dys += [dy]
                dss += [ds]