Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
----------
X : np.array
List of datapoints
Returns
-------
n_jobs : int
Number of jobs
list of int
List of number of datapoints per job
list of int
List of start values for every job list
"""
n_datapoints = len(X)
n_jobs = min(effective_n_jobs(self.n_jobs), n_datapoints)
n_datapoints_per_job = np.full(
n_jobs, n_datapoints // n_jobs, dtype=np.int)
n_datapoints_per_job[:n_datapoints % n_jobs] += 1
starts = np.cumsum(n_datapoints_per_job)
return n_jobs, n_datapoints_per_job.tolist(), [0] + starts.tolist()
n_features_to_select=self.min_features_to_select,
step=self.step, verbose=self.verbose)
# Determine the number of subsets of features by fitting across
# the train folds and choosing the "features_to_select" parameter
# that gives the least averaged error across all folds.
# Note that joblib raises a non-picklable error for bound methods
# even if n_jobs is set to 1 with the default multiprocessing
# backend.
# This branching is done so that to
# make sure that user code that sets n_jobs to 1
# and provides bound methods as scorers is not broken with the
# addition of n_jobs parameter in version 0.18.
if effective_n_jobs(self.n_jobs) == 1:
parallel, func = list, _rfe_single_fit
else:
parallel = Parallel(n_jobs=self.n_jobs)
func = delayed(_rfe_single_fit)
scores = parallel(
func(rfe, self.estimator, X, y, train, test, scorer)
for train, test in cv.split(X, y, groups))
scores = np.sum(scores, axis=0)
scores_rev = scores[::-1]
argmax_idx = len(scores) - np.argmax(scores_rev) - 1
n_features_to_select = max(
n_features - (argmax_idx * step),
self.min_features_to_select)
if effective_n_jobs(n_jobs) == 1 or algorithm == 'threshold':
code = _sparse_encode(X,
dictionary, gram, cov=cov,
algorithm=algorithm,
regularization=regularization, copy_cov=copy_cov,
init=init,
max_iter=max_iter,
check_input=False,
verbose=verbose,
positive=positive)
return code
# Enter parallel code block
code = np.empty((n_samples, n_components))
slices = list(gen_even_slices(n_samples, effective_n_jobs(n_jobs)))
code_views = Parallel(n_jobs=n_jobs, verbose=verbose)(
delayed(_sparse_encode)(
X[this_slice], dictionary, gram,
cov[:, this_slice] if cov is not None else None,
algorithm,
regularization=regularization, copy_cov=copy_cov,
init=init[this_slice] if init is not None else None,
max_iter=max_iter,
check_input=False,
verbose=verbose,
positive=positive)
for this_slice in slices)
for this_slice, this_view in zip(slices, code_views):
code[this_slice] = this_view
return code
"""Break the pairwise matrix in n_jobs even slices
and compute them in parallel"""
if Y is None:
Y = X
X, Y, dtype = _return_float_dtype(X, Y)
if effective_n_jobs(n_jobs) == 1:
return func(X, Y, **kwds)
# enforce a threading backend to prevent data communication overhead
fd = delayed(_dist_wrapper)
ret = np.empty((X.shape[0], Y.shape[0]), dtype=dtype, order='F')
Parallel(backend="threading", n_jobs=n_jobs)(
fd(func, ret, s, X, Y[s], **kwds)
for s in gen_even_slices(_num_samples(Y), effective_n_jobs(n_jobs)))
if (X is Y or Y is None) and func is euclidean_distances:
# zeroing diagonal for euclidean norm.
# TODO: do it also for other norms.
np.fill_diagonal(ret, 0)
return ret
dtype = bool if metric in PAIRWISE_BOOLEAN_FUNCTIONS else None
if (dtype == bool and
(X.dtype != bool or (Y is not None and Y.dtype != bool))):
msg = "Data was converted to boolean for metric %s" % metric
warnings.warn(msg, DataConversionWarning)
X, Y = check_pairwise_arrays(X, Y, dtype=dtype,
force_all_finite=force_all_finite)
# precompute data-derived metric params
params = _precompute_metric_params(X, Y, metric=metric, **kwds)
kwds.update(**params)
if effective_n_jobs(n_jobs) == 1 and X is Y:
return distance.squareform(distance.pdist(X, metric=metric,
**kwds))
func = partial(distance.cdist, metric=metric, **kwds)
return _parallel_pairwise(X, Y, func, n_jobs, **kwds)
dissimilarities = check_array(dissimilarities)
random_state = check_random_state(random_state)
if hasattr(init, '__array__'):
init = np.asarray(init).copy()
if not n_init == 1:
warnings.warn(
'Explicit initial positions passed: '
'performing only one init of the MDS instead of %d'
% n_init)
n_init = 1
best_pos, best_stress = None, None
if effective_n_jobs(n_jobs) == 1:
for it in range(n_init):
pos, stress, n_iter_ = _smacof_single(
dissimilarities, metric=metric,
n_components=n_components, init=init,
max_iter=max_iter, verbose=verbose,
eps=eps, random_state=random_state)
if best_stress is None or stress < best_stress:
best_stress = stress
best_pos = pos.copy()
best_iter = n_iter_
else:
seeds = random_state.randint(np.iinfo(np.int32).max, size=n_init)
results = Parallel(n_jobs=n_jobs, verbose=max(verbose - 1, 0))(
delayed(_smacof_single)(
dissimilarities, metric=metric, n_components=n_components,
init=init, max_iter=max_iter, verbose=verbose, eps=eps,