Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_threadpool_limits_manual_unregister():
# Check that threadpool_limits can be used as an object which holds the
# original state of the threadpools and that can be restored thanks to the
# dedicated unregister method
original_info = _threadpool_info()
limits = threadpool_limits(limits=1)
try:
for module in _threadpool_info():
if is_old_openblas(module):
continue
assert module.num_threads == 1
finally:
# Restore the original limits so that this test does not have any
# side-effect.
limits.unregister()
assert _threadpool_info() == original_info
def test_shipped_openblas():
# checks that OpenBLAS effectively uses the number of threads requested by
# the context manager
original_info = _threadpool_info()
openblas_modules = original_info.get_modules("internal_api", "openblas")
with threadpool_limits(1):
for module in openblas_modules:
assert module.get_num_threads() == 1
assert original_info == _threadpool_info()
# "MKL_NUM_THREADS", # mkl
# "VECLIB_MAXIMUM_THREADS", # accelerate
# "NUMEXPR_NUM_THREADS"]: # numexpr
# os.environ[envar] = "1"
# The above may be the safest way to limit thread use on all systems,
# but requires importing before np. => Instead, use threadpoolctl!
#
# >>> import numpy as np
# >>> from threadpoolctl import threadpool_limits
# >>> N = 4*10**3
# >>> a = np.random.randn(N, N)
# >>> # Now start monitoring CPU usage (with e.g. htop).
# >>> with threadpool_limits(limits=1, user_api='blas'):
# >>> a2 = a @ a
import threadpoolctl
threadpoolctl.threadpool_limits(1)
no_MP = False
except ImportError:
no_MP = True
def MP_warn():
if not MP_warn.have_printed: # only print once!
print(textwrap.dedent("""
Warning: Multiprocessing (MP) was requsted during execution,
but has not been properly installed.
Try re-installing DAPPER with `pip install -e [MP]`.
"""))
MP_warn.have_printed = True
MP_warn.have_printed = False
ValueError: If any controls are not real-valued, or if any update
shape is not a real-valued function in the range [0, 1]; if using
`continue_from` with a :class:`.Result` with differing
`objectives`; if there are any required keys missing in
`pulse_options`.
"""
logger = logging.getLogger('krotov')
# Initialization
logger.info("Initializing optimization with Krotov's method")
thread_pool_limiter = None
if limit_thread_pool is None:
limit_thread_pool = USE_THREADPOOL_LIMITS
if limit_thread_pool:
logger.debug("Setting threadpoolctrl.threadpool_limits")
thread_pool_limiter = threadpoolctl.threadpool_limits(limits=1)
if mu is None:
mu = derivative_wrt_pulse
second_order = sigma is not None
if norm is None:
norm = lambda state: state.norm()
if overlap is None:
overlap = _overlap
if modify_params_after_iter is not None:
# From a technical perspective, the `modify_params_after_iter` is
# really just another info_hook, the only difference is the
# convention that info_hooks shouldn't modify the parameters.
if info_hook is None:
info_hook = modify_params_after_iter
else:
info_hook = chain(modify_params_after_iter, info_hook)
if isinstance(propagator, list):
# Construct jitter specifications
jitter_params = {}
if jittered:
# Retrieve parameters for histogram jittering, the ordering of the jittering
# parameters is the same as that for the binning
jaxes = kwds.pop('jitter_axes', axes)
jitter_params = {'jitter_axes': jaxes,
'jitter_bins': kwds.pop('jitter_bins', nbins),
'jitter_amplitude': kwds.pop('jitter_amplitude', 0.5*np.ones(len(jaxes))),
'jitter_ranges': kwds.pop('jitter_ranges', ranges),
'jitter_type': kwds.pop('jitter_type', 'normal')}
# limit multithreading in worker threads
nthreads_per_worker = kwds.pop('nthreads_per_worker', 4)
threadpool_api = kwds.pop('threadpool_api', 'blas')
with threadpool_limits(limits=nthreads_per_worker, user_api=threadpool_api):
# Main loop for binning
for i in tqdm(range(0, df.npartitions, ncores), disable=not(pbar)):
coreTasks = [] # Core-level jobs
for j in range(0, ncores):
ij = i + j
if ij >= df.npartitions:
break
dfPartition = df.get_partition(ij) # Obtain dataframe partition
coreTasks.append(d.delayed(binPartition_numba)(dfPartition, axes, nbins, ranges, jittered, jitter_params))
if len(coreTasks) > 0:
coreResults = d.compute(*coreTasks, **kwds)
# Reset containers of results
self.results = {}
self.combinedresult = {}
self.combinedresult['binned'] = np.zeros(tuple(nbins))
tqdm = u.tqdmenv(pbenv)
ncores = self.ncores
# Execute binning tasks
binning_kwds = u.dictmerge({'ret':'histogram'}, binning_kwds)
# limit multithreading in worker threads
nthreads_per_worker = binning_kwds.pop('nthreads_per_worker', 1)
threadpool_api = binning_kwds.pop('threadpool_api', 'blas')
with threadpool_limits(limits=nthreads_per_worker, user_api=threadpool_api):
# Construct binning tasks
for i in tqdm(range(0, len(self.files), ncores), disable=not(pbar)):
coreTasks = [] # Core-level jobs
for j in range(0, ncores):
# Fill up worker threads
ij = i + j
if ij >= len(self.files):
break
file = self.files[ij]
coreTasks.append(d.delayed(hdf5Processor(file).localBinning)(axes=axes, nbins=nbins, ranges=ranges, **binning_kwds))
if len(coreTasks) > 0:
coreResults = d.compute(*coreTasks, scheduler=scheduler, **compute_kwds)
# Combine all core results for a dataframe partition
# Fast parallel version with Dask
def run_one(n_times, n_times_atom, n_atoms, n_channels, noise_level,
random_state, reg, tol, strategy, dicod_args):
threadpool_limits(1)
tag = f"[{strategy} - {n_times} - {reg}]"
current_time = time.time() - START
msg = f"\r{tag} started at T={current_time:.0f} sec"
print(colorify(msg, BLUE))
X, D_hat, lmbd_max = simulate_data(
n_times, n_times_atom, n_atoms, n_channels, noise_level,
random_state=random_state)
reg_ = reg * lmbd_max
n_seg = 1
if strategy == 'lgcd':
n_seg = 'auto'
*_, pobj, run_statistics = dicod(X, D_hat, reg_, n_workers=1, tol=tol,
self._emul_s_to_core = list(emul_s_to_core)
# Save total number of emulator systems
self._n_emul_s_tot = sum([len(seq) for seq in emul_s_to_core])
# Assign the emulator systems to the various MPI ranks
self._emul_s = self._comm.scatter(emul_s_to_core, 0)
# Calculate the most optimal number of OpenMP threads if using MPI
if(MPI.COMM_WORLD.Get_size() > 1):
omp_num_threads = max(1, self._size//self._n_emul_s_tot)
else:
omp_num_threads = None
# Tell workers to use this number of OpenMP threads
tpc.threadpool_limits(self._comm.bcast(omp_num_threads, 0), 'blas')
# Workers get their emulator systems assigned
else:
# Receive assigned emulator systems
self._emul_s = self._comm.scatter(None, 0)
# Set number of OpenMP threads
tpc.threadpool_limits(self._comm.bcast(None, 0), 'blas')
# Temporarily manually swap the CFilter for RFilter
# Every rank logs what systems were assigned to it
# TODO: Remove the need to do this manually
logger.filters = [logger.PRISM_filters['RFilter']]
logger.info("Received emulator systems %s." % (self._emul_s))
logger.filters = [logger.PRISM_filters['CFilter']]
# serial
xs, ys, ss, Ds, DTs = [], [], [], [], []
for i in range(batch_size):
x, y, s, D, DT = solve_and_derivative(As[i], bs[i], cs[i],
cone_dicts[i], warm_starts[i], mode=mode, **kwargs)
xs += [x]
ys += [y]
ss += [s]
Ds += [D]
DTs += [DT]
else:
# thread pool
pool = ThreadPool(processes=n_jobs_forward)
args = [(A, b, c, cone_dict, warm_start, mode, kwargs) for A, b, c, cone_dict, warm_start in \
zip(As, bs, cs, cone_dicts, warm_starts)]
with threadpool_limits(limits=1):
results = pool.starmap(solve_and_derivative_wrapper, args)
pool.close()
xs = [r[0] for r in results]
ys = [r[1] for r in results]
ss = [r[2] for r in results]
Ds = [r[3] for r in results]
DTs = [r[4] for r in results]
if n_jobs_backward == 1:
def D_batch(dAs, dbs, dcs, **kwargs):
dxs, dys, dss = [], [], []
for i in range(batch_size):
dx, dy, ds = Ds[i](dAs[i], dbs[i], dcs[i], **kwargs)
dxs += [dx]
dys += [dy]
dss += [ds]