How to use the suod.models.parallel_processes.balanced_scheduling function in suod

To help you get started, we’ve selected a few suod examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github yzhao062 / SUOD / suod / models / base.py View on Github external
# build flags for random projection
        self.rp_flags_, _ = build_codes(self.base_estimators, self.rp_clf_list,
                                        self.rp_ng_clf_list,
                                        self.rp_flag_global)

        # decide whether bps is needed
        # it is turned off
        if self.bps_flag:
            # load the pre-trained cost predictor to forecast the train cost
            cost_predictor = joblib.load(self.cost_forecast_loc_fit_)

            time_cost_pred = cost_forecast_meta(cost_predictor, X,
                                                self.base_estimator_names)

            # use BPS
            n_estimators_list, starts, n_jobs = balanced_scheduling(
                time_cost_pred, self.n_estimators, self.n_jobs)
        else:
            # use the default sklearn equal split
            n_estimators_list, starts, n_jobs = _partition_estimators(
                self.n_estimators, self.n_jobs)

        # fit the base models
        print('Parallel Training...')
        start = time.time()

        # TODO: code cleanup. There is an existing bug for joblib on Windows:
        # https://github.com/joblib/joblib/issues/806
        # a fix is on the way: https://github.com/joblib/joblib/pull/966
        # max_nbytes can be dropped on other OS
        all_results = Parallel(n_jobs=n_jobs, max_nbytes=None, verbose=True)(
            delayed(_parallel_fit)(
github yzhao062 / SUOD / suod / models / base.py View on Github external
anomaly_scores : numpy array of shape (n_samples,)
            The anomaly score of the input samples.
        """
        X = check_array(X)
        n_samples, n_features = X.shape[0], X.shape[1]

        # decide whether bps is needed
        # it is turned off
        if self.bps_flag:
            # load the pre-trained cost predictor to forecast the train cost
            cost_predictor = joblib.load(self.cost_forecast_loc_pred_)

            time_cost_pred = cost_forecast_meta(cost_predictor, X,
                                                self.base_estimator_names)

            n_estimators_list, starts, n_jobs = balanced_scheduling(
                time_cost_pred, self.n_estimators, self.n_jobs)
        else:
            # use simple equal split by sklearn
            n_estimators_list, starts, n_jobs = _partition_estimators(
                self.n_estimators, self.n_jobs)

        # fit the base models
        if self.verbose:
            print('Parallel score prediction...')
            start = time.time()

        # TODO: code cleanup. There is an existing bug for joblib on Windows:
        # https://github.com/joblib/joblib/issues/806
        # max_nbytes can be dropped on other OS
        all_results_scores = Parallel(n_jobs=n_jobs, max_nbytes=None,
                                      verbose=True)(
github yzhao062 / SUOD / suod / models / base.py View on Github external
it should be considered as an outlier according to the
            fitted model. 0 stands for inliers and 1 for outliers.
        """
        X = check_array(X)
        n_samples, n_features = X.shape[0], X.shape[1]

        # decide whether bps is needed
        # it is turned off
        if self.bps_flag:
            # load the pre-trained cost predictor to forecast the train cost
            cost_predictor = joblib.load(self.cost_forecast_loc_pred_)

            time_cost_pred = cost_forecast_meta(cost_predictor, X,
                                                self.base_estimator_names)

            n_estimators_list, starts, n_jobs = balanced_scheduling(
                time_cost_pred, self.n_estimators, self.n_jobs)
        else:
            # use simple equal split by sklearn
            n_estimators_list, starts, n_jobs = _partition_estimators(
                self.n_estimators, self.n_jobs)

        # fit the base models
        if self.verbose:
            print('Parallel label prediction...')
            start = time.time()

        # TODO: code cleanup. There is an existing bug for joblib on Windows:
        # https://github.com/joblib/joblib/issues/806
        # max_nbytes can be dropped on other OS
        all_results_pred = Parallel(n_jobs=n_jobs, max_nbytes=None,
                                    verbose=True)(
github yzhao062 / SUOD / examples / do_not_use_demo_full.py View on Github external
rp_flag_global = True
objective_dim = 6
rp_method = 'discrete'

# build flags for random projection
rp_flags, base_estimator_names = build_codes(base_estimators, rp_clf_list,
                                             rp_ng_clf_list, rp_flag_global)

# load the pre-trained cost predictor to forecast the train cost
clf_train = joblib.load(
    os.path.join('../suod', 'models', 'saved_models', 'bps_train.joblib'))

time_cost_pred = cost_forecast_meta(clf_train, X, base_estimator_names)

# schedule the tasks
n_estimators_list, starts, n_jobs = balanced_scheduling(time_cost_pred,
                                                        n_estimators, n_jobs)

print(starts)  # this is the list of being split
start = time.time()

print('Parallel Training...')

# TODO: code cleanup. There is an existing bug for joblib on Windows:
# https://github.com/joblib/joblib/issues/806
# max_nbytes can be dropped on other OS
all_results = Parallel(n_jobs=n_jobs, max_nbytes=None, verbose=True)(
    delayed(_parallel_fit)(
        n_estimators_list[i],
        base_estimators[starts[i]:starts[i + 1]],
        X,
        n_estimators,
github yzhao062 / SUOD / suod / models / base.py View on Github external
fitted model. Return the outlier probability, ranging
            in [0,1].
        """
        X = check_array(X)
        n_samples, n_features = X.shape[0], X.shape[1]

        # decide whether bps is needed
        # it is turned off
        if self.bps_flag:
            # load the pre-trained cost predictor to forecast the train cost
            cost_predictor = joblib.load(self.cost_forecast_loc_pred_)

            time_cost_pred = cost_forecast_meta(cost_predictor, X,
                                                self.base_estimator_names)

            n_estimators_list, starts, n_jobs = balanced_scheduling(
                time_cost_pred, self.n_estimators, self.n_jobs)
        else:
            # use simple equal split by sklearn
            n_estimators_list, starts, n_jobs = _partition_estimators(
                self.n_estimators, self.n_jobs)

        # fit the base models
        if self.verbose:
            print('Parallel score prediction...')
            start = time.time()

        # TODO: code cleanup. There is an existing bug for joblib on Windows:
        # https://github.com/joblib/joblib/issues/806
        # max_nbytes can be dropped on other OS
        all_results_scores = Parallel(n_jobs=n_jobs, max_nbytes=None,
                                      verbose=True)(
github yzhao062 / SUOD / examples / do_not_use_demo_full.py View on Github external
approximators = _unfold_parallel(all_approx_results, n_jobs)


# %% Second BPS for prediction
###############################################################################
# still build the rank sum by BPS
# load the pre-trained cost predictor to forecast the prediction cost
clf_prediction = joblib.load(
    os.path.join('../suod', 'models', 'saved_models', 'bps_prediction.joblib'))

time_cost_pred = cost_forecast_meta(clf_prediction, X, base_estimator_names)

# TODO: add a second-stage tuner for prediction stage

n_estimators_list, starts, n_jobs = balanced_scheduling(time_cost_pred,
                                                        n_estimators, n_jobs)

print('Parallel Label Predicting without Approximators...')

# all_results_pred = Parallel(n_jobs=n_jobs, max_nbytes=None, verbose=True)(
#     delayed(_parallel_predict)(
#         n_estimators_list[i],
#         trained_estimators[starts[i]:starts[i + 1]],
#         approximators[starts[i]:starts[i + 1]],
#         X,
#         n_estimators,
#         rp_flags[starts[i]:starts[i + 1]],
#         jl_transformers[starts[i]:starts[i + 1]],
#         approx_flags[starts[i]:starts[i + 1]],
#         contamination,
#         verbose=True)