Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
time_cost_pred = cost_forecast_meta(clf_train, X, base_estimator_names)
# schedule the tasks
n_estimators_list, starts, n_jobs = balanced_scheduling(time_cost_pred,
n_estimators, n_jobs)
print(starts) # this is the list of being split
start = time.time()
print('Parallel Training...')
# TODO: code cleanup. There is an existing bug for joblib on Windows:
# https://github.com/joblib/joblib/issues/806
# max_nbytes can be dropped on other OS
all_results = Parallel(n_jobs=n_jobs, max_nbytes=None, verbose=True)(
delayed(_parallel_fit)(
n_estimators_list[i],
base_estimators[starts[i]:starts[i + 1]],
X,
n_estimators,
rp_flags[starts[i]:starts[i + 1]],
objective_dim,
rp_method=rp_method,
verbose=True)
for i in range(n_jobs))
print('Balanced Scheduling Total Train Time:', time.time() - start)
# reformat and unfold the lists. Save the trained estimators and transformers
all_results = list(map(list, zip(*all_results)))
trained_estimators = _unfold_parallel(all_results[0], n_jobs)
jl_transformers = _unfold_parallel(all_results[1], n_jobs)
##########################################################################
# compare with no projection, no bps, and no approximation
print("******************************************************************")
start = time.time()
n_estimators = len(base_estimators)
n_estimators_list, starts, n_jobs = _partition_estimators(n_estimators,
n_jobs)
rp_flags = np.zeros([n_estimators, 1])
approx_flags = np.zeros([n_estimators, 1])
objective_dim = None
rp_method = None
all_results = Parallel(n_jobs=n_jobs, max_nbytes=None, verbose=True)(
delayed(_parallel_fit)(
n_estimators_list[i],
base_estimators[starts[i]:starts[i + 1]],
X_train,
n_estimators,
rp_flags[starts[i]:starts[i + 1]],
objective_dim,
rp_method=rp_method,
verbose=True)
for i in range(n_jobs))
print('Orig Fit time:', time.time() - start)
print()
all_results = list(map(list, zip(*all_results)))
trained_estimators = _unfold_parallel(all_results[0], n_jobs)
jl_transformers = _unfold_parallel(all_results[1], n_jobs)
##########################################################################
# compare with no projection, no bps, and no approximation
print("******************************************************************")
n_estimators = len(base_estimators)
n_jobs = 6
n_estimators_list, starts, n_jobs = _partition_estimators(n_estimators,
n_jobs)
rp_flags = np.zeros([n_estimators, 1])
approx_flags = np.zeros([n_estimators, 1])
objective_dim = None
rp_method = None
start = time.time()
all_results = Parallel(n_jobs=n_jobs, max_nbytes=None, verbose=True)(
delayed(_parallel_fit)(
n_estimators_list[i],
base_estimators[starts[i]:starts[i + 1]],
X,
n_estimators,
rp_flags[starts[i]:starts[i + 1]],
objective_dim,
rp_method=rp_method,
verbose=True)
for i in range(n_jobs))
print('Orig Fit time:', time.time() - start)
print()
all_results = list(map(list, zip(*all_results)))
trained_estimators = _unfold_parallel(all_results[0], n_jobs)
jl_transformers = _unfold_parallel(all_results[1], n_jobs)
time_cost_pred, self.n_estimators, self.n_jobs)
else:
# use the default sklearn equal split
n_estimators_list, starts, n_jobs = _partition_estimators(
self.n_estimators, self.n_jobs)
# fit the base models
print('Parallel Training...')
start = time.time()
# TODO: code cleanup. There is an existing bug for joblib on Windows:
# https://github.com/joblib/joblib/issues/806
# a fix is on the way: https://github.com/joblib/joblib/pull/966
# max_nbytes can be dropped on other OS
all_results = Parallel(n_jobs=n_jobs, max_nbytes=None, verbose=True)(
delayed(_parallel_fit)(
n_estimators_list[i],
self.base_estimators[starts[i]:starts[i + 1]],
X,
self.n_estimators,
self.rp_flags[starts[i]:starts[i + 1]],
self.target_dim_frac_,
self.jl_method,
verbose=self.verbose)
for i in range(n_jobs))
print('Balanced Scheduling Total Train Time:', time.time() - start)
# reformat and unfold the lists. Save the trained estimators and transformers
all_results = list(map(list, zip(*all_results)))
# overwrite estimators
##########################################################################
# compare with no projection, no bps, and no approximation
print("******************************************************************")
start = time.time()
n_estimators = len(base_estimators)
n_estimators_list, starts, n_jobs = _partition_estimators(n_estimators,
n_jobs)
rp_flags = np.zeros([n_estimators, 1])
approx_flags = np.zeros([n_estimators, 1])
objective_dim = None
rp_method = None
all_results = Parallel(n_jobs=n_jobs, max_nbytes=None, verbose=True)(
delayed(_parallel_fit)(
n_estimators_list[i],
base_estimators[starts[i]:starts[i + 1]],
X_train,
n_estimators,
rp_flags[starts[i]:starts[i + 1]],
objective_dim,
rp_method=rp_method,
verbose=True)
for i in range(n_jobs))
print('Orig Fit time:', time.time() - start)
print()
all_results = list(map(list, zip(*all_results)))
trained_estimators = _unfold_parallel(all_results[0], n_jobs)
jl_transformers = _unfold_parallel(all_results[1], n_jobs)