Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
n_estimators_list[i],
base_estimators[starts[i]:starts[i + 1]],
X,
n_estimators,
rp_flags[starts[i]:starts[i + 1]],
objective_dim,
rp_method=rp_method,
verbose=True)
for i in range(n_jobs))
print('Balanced Scheduling Total Train Time:', time.time() - start)
# reformat and unfold the lists. Save the trained estimators and transformers
all_results = list(map(list, zip(*all_results)))
trained_estimators = _unfold_parallel(all_results[0], n_jobs)
jl_transformers = _unfold_parallel(all_results[1], n_jobs)
###############################################################################
# %% Model Approximation
approx_clf_list = ['LOF', 'KNN']
approx_ng_clf_list = ['IForest', 'PCA', 'HBOS', 'ABOD']
approx_flag_global = True
# build approx code
# this can be a pre-defined list and directly supply to the system
approx_clf = RandomForestRegressor(n_estimators=100)
approx_flags, base_estimator_names = build_codes(base_estimators,
approx_clf_list,
approx_ng_clf_list,
delayed(_parallel_fit)(
n_estimators_list[i],
base_estimators[starts[i]:starts[i + 1]],
X,
n_estimators,
rp_flags[starts[i]:starts[i + 1]],
objective_dim,
rp_method=rp_method,
verbose=True)
for i in range(n_jobs))
print('Orig Fit time:', time.time() - start)
print()
all_results = list(map(list, zip(*all_results)))
trained_estimators = _unfold_parallel(all_results[0], n_jobs)
jl_transformers = _unfold_parallel(all_results[1], n_jobs)
##########################################################################
start = time.time()
# model prediction
all_results_pred = Parallel(n_jobs=n_jobs, max_nbytes=None,
verbose=True)(
delayed(_parallel_predict)(
n_estimators_list[i],
trained_estimators[starts[i]:starts[i + 1]],
None,
X,
n_estimators,
rp_flags[starts[i]:starts[i + 1]],
jl_transformers,
approx_flags[starts[i]:starts[i + 1]],
all_approx_results = Parallel(n_jobs=n_jobs, verbose=True)(
delayed(_parallel_approx_estimators)(
n_estimators_list[i],
self.base_estimators[starts[i]:starts[i + 1]],
X, # if it is a PyOD model, we do not need this
self.n_estimators,
self.approx_flags[starts[i]:starts[i + 1]],
self.approx_clf,
self.jl_transformers_[starts[i]:starts[i + 1]],
verbose=True)
for i in range(n_jobs))
# print('Balanced Scheduling Total Test Time:', time.time() - start)
self.approximators = _unfold_parallel(all_approx_results, n_jobs)
return self
X,
self.n_estimators,
self.rp_flags[starts[i]:starts[i + 1]],
self.target_dim_frac_,
self.jl_method,
verbose=self.verbose)
for i in range(n_jobs))
print('Balanced Scheduling Total Train Time:', time.time() - start)
# reformat and unfold the lists. Save the trained estimators and transformers
all_results = list(map(list, zip(*all_results)))
# overwrite estimators
self.base_estimators = _unfold_parallel(all_results[0], n_jobs)
self.jl_transformers_ = _unfold_parallel(all_results[1], n_jobs)
return self
n_estimators_list[i],
base_estimators[starts[i]:starts[i + 1]],
X_train,
n_estimators,
rp_flags[starts[i]:starts[i + 1]],
objective_dim,
rp_method=rp_method,
verbose=True)
for i in range(n_jobs))
print('Orig Fit time:', time.time() - start)
print()
all_results = list(map(list, zip(*all_results)))
trained_estimators = _unfold_parallel(all_results[0], n_jobs)
jl_transformers = _unfold_parallel(all_results[1], n_jobs)
##########################################################################
start = time.time()
n_estimators = len(base_estimators)
n_estimators_list, starts, n_jobs = _partition_estimators(n_estimators,
n_jobs)
# model prediction
all_results_pred = Parallel(n_jobs=n_jobs, max_nbytes=None,
verbose=True)(
delayed(_parallel_predict)(
n_estimators_list[i],
trained_estimators[starts[i]:starts[i + 1]],
None,
X_test,
n_estimators,
jl_transformers,
n_estimators_list[i],
base_estimators[starts[i]:starts[i + 1]],
X,
n_estimators,
rp_flags[starts[i]:starts[i + 1]],
objective_dim,
rp_method=rp_method,
verbose=True)
for i in range(n_jobs))
print('Orig Fit time:', time.time() - start)
print()
all_results = list(map(list, zip(*all_results)))
trained_estimators = _unfold_parallel(all_results[0], n_jobs)
jl_transformers = _unfold_parallel(all_results[1], n_jobs)
##########################################################################
start = time.time()
# model prediction
all_results_pred = Parallel(n_jobs=n_jobs, max_nbytes=None,
verbose=True)(
delayed(_parallel_predict)(
n_estimators_list[i],
trained_estimators[starts[i]:starts[i + 1]],
None,
X,
n_estimators,
rp_flags[starts[i]:starts[i + 1]],
jl_transformers,
approx_flags[starts[i]:starts[i + 1]],
contamination,
self.base_estimators[starts[i]:starts[i + 1]],
X,
self.n_estimators,
self.rp_flags[starts[i]:starts[i + 1]],
self.target_dim_frac_,
self.jl_method,
verbose=self.verbose)
for i in range(n_jobs))
print('Balanced Scheduling Total Train Time:', time.time() - start)
# reformat and unfold the lists. Save the trained estimators and transformers
all_results = list(map(list, zip(*all_results)))
# overwrite estimators
self.base_estimators = _unfold_parallel(all_results[0], n_jobs)
self.jl_transformers_ = _unfold_parallel(all_results[1], n_jobs)
return self
delayed(_parallel_fit)(
n_estimators_list[i],
base_estimators[starts[i]:starts[i + 1]],
X_train,
n_estimators,
rp_flags[starts[i]:starts[i + 1]],
objective_dim,
rp_method=rp_method,
verbose=True)
for i in range(n_jobs))
print('Orig Fit time:', time.time() - start)
print()
all_results = list(map(list, zip(*all_results)))
trained_estimators = _unfold_parallel(all_results[0], n_jobs)
jl_transformers = _unfold_parallel(all_results[1], n_jobs)
##########################################################################
start = time.time()
n_estimators = len(base_estimators)
n_estimators_list, starts, n_jobs = _partition_estimators(n_estimators,
n_jobs)
# model prediction
all_results_pred = Parallel(n_jobs=n_jobs, max_nbytes=None,
verbose=True)(
delayed(_parallel_predict)(
n_estimators_list[i],
trained_estimators[starts[i]:starts[i + 1]],
None,
X_test,
n_estimators,
delayed(_parallel_fit)(
n_estimators_list[i],
base_estimators[starts[i]:starts[i + 1]],
X,
n_estimators,
rp_flags[starts[i]:starts[i + 1]],
objective_dim,
rp_method=rp_method,
verbose=True)
for i in range(n_jobs))
print('Balanced Scheduling Total Train Time:', time.time() - start)
# reformat and unfold the lists. Save the trained estimators and transformers
all_results = list(map(list, zip(*all_results)))
trained_estimators = _unfold_parallel(all_results[0], n_jobs)
jl_transformers = _unfold_parallel(all_results[1], n_jobs)
###############################################################################
# %% Model Approximation
approx_clf_list = ['LOF', 'KNN']
approx_ng_clf_list = ['IForest', 'PCA', 'HBOS', 'ABOD']
approx_flag_global = True
# build approx code
# this can be a pre-defined list and directly supply to the system
approx_clf = RandomForestRegressor(n_estimators=100)
approx_flags, base_estimator_names = build_codes(base_estimators,
approx_clf_list,
# TODO: here has a bug. For some reason, approximators do not match approx_flags
all_approx_results = Parallel(n_jobs=n_jobs, max_nbytes=None, verbose=True)(
delayed(_parallel_approx_estimators)(
n_estimators_list[i],
trained_estimators[starts[i]:starts[i + 1]],
X, # if it is a PyOD model, we do not need this
n_estimators,
approx_flags[starts[i]:starts[i + 1]],
approx_clf,
verbose=True)
for i in range(n_jobs))
print('Balanced Scheduling Total Test Time:', time.time() - start)
approximators = _unfold_parallel(all_approx_results, n_jobs)
# %% Second BPS for prediction
###############################################################################
# still build the rank sum by BPS
# load the pre-trained cost predictor to forecast the prediction cost
clf_prediction = joblib.load(
os.path.join('../suod', 'models', 'saved_models', 'bps_prediction.joblib'))
time_cost_pred = cost_forecast_meta(clf_prediction, X, base_estimator_names)
# TODO: add a second-stage tuner for prediction stage
n_estimators_list, starts, n_jobs = balanced_scheduling(time_cost_pred,
n_estimators, n_jobs)