How to use the suod.models.parallel_processes._partition_estimators function in suod

To help you get started, we’ve selected a few suod examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github yzhao062 / SUOD / examples / temp_do_not_use.py View on Github external
contamination,
            verbose=True)
        for i in range(n_jobs))

    print('Orig Predict time:', time.time() - start)
    print()

    # unfold and generate the label matrix
    predicted_labels_orig = np.zeros([X_test.shape[0], n_estimators])
    for i in range(n_jobs):
        predicted_labels_orig[:, starts[i]:starts[i + 1]] = np.asarray(
            all_results_pred[i]).T

    start = time.time()
    n_estimators = len(base_estimators)
    n_estimators_list, starts, n_jobs = _partition_estimators(n_estimators,
                                                              n_jobs)
    # model prediction
    all_results_scores = Parallel(n_jobs=n_jobs, max_nbytes=None,
                                  verbose=True)(
        delayed(_parallel_decision_function)(
            n_estimators_list[i],
            trained_estimators[starts[i]:starts[i + 1]],
            None,
            X_test,
            n_estimators,
            # rp_flags[starts[i]:starts[i + 1]],
            jl_transformers,
            approx_flags[starts[i]:starts[i + 1]],
            verbose=True)
        for i in range(n_jobs))
github yzhao062 / SUOD / examples / temp_do_not_use.py View on Github external
start = time.time()
    predicted_labels = model.predict(X_test)  # predict labels
    print('Predict time:', time.time() - start)
    print()

    start = time.time()
    predicted_scores = model.decision_function(X_test)  # predict scores
    print('Decision Function time:', time.time() - start)
    print()

    ##########################################################################
    # compare with no projection, no bps, and no approximation
    print("******************************************************************")
    start = time.time()
    n_estimators = len(base_estimators)
    n_estimators_list, starts, n_jobs = _partition_estimators(n_estimators,
                                                              n_jobs)

    rp_flags = np.zeros([n_estimators, 1])
    approx_flags = np.zeros([n_estimators, 1])
    objective_dim = None
    rp_method = None

    all_results = Parallel(n_jobs=n_jobs, max_nbytes=None, verbose=True)(
        delayed(_parallel_fit)(
            n_estimators_list[i],
            base_estimators[starts[i]:starts[i + 1]],
            X_train,
            n_estimators,
            rp_flags[starts[i]:starts[i + 1]],
            objective_dim,
            rp_method=rp_method,
github yzhao062 / SUOD / suod / models / base.py View on Github external
n_samples, n_features = X.shape[0], X.shape[1]

        # decide whether bps is needed
        # it is turned off
        if self.bps_flag:
            # load the pre-trained cost predictor to forecast the train cost
            cost_predictor = joblib.load(self.cost_forecast_loc_pred_)

            time_cost_pred = cost_forecast_meta(cost_predictor, X,
                                                self.base_estimator_names)

            n_estimators_list, starts, n_jobs = balanced_scheduling(
                time_cost_pred, self.n_estimators, self.n_jobs)
        else:
            # use simple equal split by sklearn
            n_estimators_list, starts, n_jobs = _partition_estimators(
                self.n_estimators, self.n_jobs)

        # fit the base models
        if self.verbose:
            print('Parallel label prediction...')
            start = time.time()

        # TODO: code cleanup. There is an existing bug for joblib on Windows:
        # https://github.com/joblib/joblib/issues/806
        # max_nbytes can be dropped on other OS
        all_results_pred = Parallel(n_jobs=n_jobs, max_nbytes=None,
                                    verbose=True)(
            delayed(_parallel_predict)(
                n_estimators_list[i],
                self.base_estimators[starts[i]:starts[i + 1]],
                self.approximators[starts[i]:starts[i + 1]],
github yzhao062 / SUOD / examples / temp_do_not_use_work_w_minist.py View on Github external
start = time.time()
    predicted_labels = model.predict(X)  # predict labels
    print('Predict time:', time.time() - start)
    print()

    start = time.time()
    predicted_scores = model.decision_function(X)  # predict scores
    print('Decision Function time:', time.time() - start)
    print()

    ##########################################################################
    # compare with no projection, no bps, and no approximation
    print("******************************************************************")
    n_estimators = len(base_estimators)
    n_jobs = 6
    n_estimators_list, starts, n_jobs = _partition_estimators(n_estimators,
                                                              n_jobs)

    rp_flags = np.zeros([n_estimators, 1])
    approx_flags = np.zeros([n_estimators, 1])
    objective_dim = None
    rp_method = None

    start = time.time()
    all_results = Parallel(n_jobs=n_jobs, max_nbytes=None, verbose=True)(
        delayed(_parallel_fit)(
            n_estimators_list[i],
            base_estimators[starts[i]:starts[i + 1]],
            X,
            n_estimators,
            rp_flags[starts[i]:starts[i + 1]],
            objective_dim,
github yzhao062 / SUOD / examples / do_not_use_demo_full.py View on Github external
approx_clf_list = ['LOF', 'KNN']
approx_ng_clf_list = ['IForest', 'PCA', 'HBOS', 'ABOD']
approx_flag_global = True

# build approx code
# this can be a pre-defined list and directly supply to the system

approx_clf = RandomForestRegressor(n_estimators=100)

approx_flags, base_estimator_names = build_codes(base_estimators,
                                                 approx_clf_list,
                                                 approx_ng_clf_list,
                                                 approx_flag_global)

n_estimators_list, starts, n_jobs = _partition_estimators(n_estimators,
                                                          n_jobs=n_jobs)
print(starts)  # this is the list of being split
start = time.time()

# TODO: here has a bug. For some reason, approximators do not match approx_flags
all_approx_results = Parallel(n_jobs=n_jobs, max_nbytes=None, verbose=True)(
    delayed(_parallel_approx_estimators)(
        n_estimators_list[i],
        trained_estimators[starts[i]:starts[i + 1]],
        X,  # if it is a PyOD model, we do not need this
        n_estimators,
        approx_flags[starts[i]:starts[i + 1]],
        approx_clf,
        verbose=True)
    for i in range(n_jobs))
github yzhao062 / SUOD / suod / models / base.py View on Github external
n_samples, n_features = X.shape[0], X.shape[1]

        # decide whether bps is needed
        # it is turned off
        if self.bps_flag:
            # load the pre-trained cost predictor to forecast the train cost
            cost_predictor = joblib.load(self.cost_forecast_loc_pred_)

            time_cost_pred = cost_forecast_meta(cost_predictor, X,
                                                self.base_estimator_names)

            n_estimators_list, starts, n_jobs = balanced_scheduling(
                time_cost_pred, self.n_estimators, self.n_jobs)
        else:
            # use simple equal split by sklearn
            n_estimators_list, starts, n_jobs = _partition_estimators(
                self.n_estimators, self.n_jobs)

        # fit the base models
        if self.verbose:
            print('Parallel score prediction...')
            start = time.time()

        # TODO: code cleanup. There is an existing bug for joblib on Windows:
        # https://github.com/joblib/joblib/issues/806
        # max_nbytes can be dropped on other OS
        all_results_scores = Parallel(n_jobs=n_jobs, max_nbytes=None,
                                      verbose=True)(
            delayed(_parallel_decision_function)(
                n_estimators_list[i],
                self.base_estimators[starts[i]:starts[i + 1]],
                self.approximators[starts[i]:starts[i + 1]],
github yzhao062 / SUOD / examples / demo_full.py View on Github external
contamination,
            verbose=True)
        for i in range(n_jobs))

    print('Orig Predict time:', time.time() - start)
    print()

    # unfold and generate the label matrix
    predicted_labels_orig = np.zeros([X_test.shape[0], n_estimators])
    for i in range(n_jobs):
        predicted_labels_orig[:, starts[i]:starts[i + 1]] = np.asarray(
            all_results_pred[i]).T

    start = time.time()
    n_estimators = len(base_estimators)
    n_estimators_list, starts, n_jobs = _partition_estimators(n_estimators,
                                                              n_jobs)
    # model prediction
    all_results_scores = Parallel(n_jobs=n_jobs, max_nbytes=None,
                                  verbose=True)(
        delayed(_parallel_decision_function)(
            n_estimators_list[i],
            trained_estimators[starts[i]:starts[i + 1]],
            None,
            X_test,
            n_estimators,
            jl_transformers,
            approx_flags[starts[i]:starts[i + 1]],
            verbose=True)
        for i in range(n_jobs))

    print('Orig decision_function time:', time.time() - start)
github yzhao062 / SUOD / examples / demo_full.py View on Github external
start = time.time()
    predicted_labels = model.predict(X_test)  # predict labels
    print('Predict time:', time.time() - start)
    print()

    start = time.time()
    predicted_scores = model.decision_function(X_test)  # predict scores
    print('Decision Function time:', time.time() - start)
    print()

    ##########################################################################
    # compare with no projection, no bps, and no approximation
    print("******************************************************************")
    start = time.time()
    n_estimators = len(base_estimators)
    n_estimators_list, starts, n_jobs = _partition_estimators(n_estimators,
                                                              n_jobs)

    rp_flags = np.zeros([n_estimators, 1])
    approx_flags = np.zeros([n_estimators, 1])
    objective_dim = None
    rp_method = None

    all_results = Parallel(n_jobs=n_jobs, max_nbytes=None, verbose=True)(
        delayed(_parallel_fit)(
            n_estimators_list[i],
            base_estimators[starts[i]:starts[i + 1]],
            X_train,
            n_estimators,
            rp_flags[starts[i]:starts[i + 1]],
            objective_dim,
            rp_method=rp_method,
github yzhao062 / SUOD / suod / models / base.py View on Github external
n_samples, n_features = X.shape[0], X.shape[1]

        # decide whether bps is needed
        # it is turned off
        if self.bps_flag:
            # load the pre-trained cost predictor to forecast the train cost
            cost_predictor = joblib.load(self.cost_forecast_loc_pred_)

            time_cost_pred = cost_forecast_meta(cost_predictor, X,
                                                self.base_estimator_names)

            n_estimators_list, starts, n_jobs = balanced_scheduling(
                time_cost_pred, self.n_estimators, self.n_jobs)
        else:
            # use simple equal split by sklearn
            n_estimators_list, starts, n_jobs = _partition_estimators(
                self.n_estimators, self.n_jobs)

        # fit the base models
        if self.verbose:
            print('Parallel score prediction...')
            start = time.time()

        # TODO: code cleanup. There is an existing bug for joblib on Windows:
        # https://github.com/joblib/joblib/issues/806
        # max_nbytes can be dropped on other OS
        all_results_scores = Parallel(n_jobs=n_jobs, max_nbytes=None,
                                      verbose=True)(
            delayed(_parallel_predict_proba)(
                n_estimators_list[i],
                self.base_estimators[starts[i]:starts[i + 1]],
                self.approximators[starts[i]:starts[i + 1]],
github yzhao062 / SUOD / suod / models / base.py View on Github external
Returns
        -------
        self : object
            The estimator after with approximation.
        """

        # todo: X may be optional
        # todo: allow to use a list of scores for approximation, instead of
        # todo: decision_scores

        self.approx_flags, _ = build_codes(self.base_estimators,
                                           self.approx_clf_list,
                                           self.approx_ng_clf_list,
                                           self.approx_flag_global)

        n_estimators_list, starts, n_jobs = _partition_estimators(
            self.n_estimators, n_jobs=self.n_jobs)

        all_approx_results = Parallel(n_jobs=n_jobs, verbose=True)(
            delayed(_parallel_approx_estimators)(
                n_estimators_list[i],
                self.base_estimators[starts[i]:starts[i + 1]],
                X,  # if it is a PyOD model, we do not need this
                self.n_estimators,
                self.approx_flags[starts[i]:starts[i + 1]],
                self.approx_clf,
                self.jl_transformers_[starts[i]:starts[i + 1]],
                verbose=True)
            for i in range(n_jobs))

        # print('Balanced Scheduling Total Test Time:', time.time() - start)