How to use the suod.utils.utility.build_codes function in suod

To help you get started, we’ve selected a few suod examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github yzhao062 / SUOD / suod / models / base.py View on Github external
Returns
        -------
        self : object
            Fitted estimator.
        """
        X = check_array(X)
        n_samples, n_features = X.shape[0], X.shape[1]

        # Validate target_dim_frac for random projection
        if isinstance(self.target_dim_frac, (numbers.Integral, np.integer)):
            self.target_dim_frac_ = self.target_dim_frac
        else:  # float
            self.target_dim_frac_ = int(self.target_dim_frac * n_features)

        # build flags for random projection
        self.rp_flags_, _ = build_codes(self.base_estimators, self.rp_clf_list,
                                        self.rp_ng_clf_list,
                                        self.rp_flag_global)

        # decide whether bps is needed
        # it is turned off
        if self.bps_flag:
            # load the pre-trained cost predictor to forecast the train cost
            cost_predictor = joblib.load(self.cost_forecast_loc_fit_)

            time_cost_pred = cost_forecast_meta(cost_predictor, X,
                                                self.base_estimator_names)

            # use BPS
            n_estimators_list, starts, n_jobs = balanced_scheduling(
                time_cost_pred, self.n_estimators, self.n_jobs)
        else:
github yzhao062 / SUOD / suod / models / base.py View on Github external
----------
        X : numpy array of shape (n_samples, n_features)
            The input samples. The same feature space of the unsupervised
            outlier detector will be used.

        Returns
        -------
        self : object
            The estimator after with approximation.
        """

        # todo: X may be optional
        # todo: allow to use a list of scores for approximation, instead of
        # todo: decision_scores

        self.approx_flags, _ = build_codes(self.base_estimators,
                                           self.approx_clf_list,
                                           self.approx_ng_clf_list,
                                           self.approx_flag_global)

        n_estimators_list, starts, n_jobs = _partition_estimators(
            self.n_estimators, n_jobs=self.n_jobs)

        all_approx_results = Parallel(n_jobs=n_jobs, verbose=True)(
            delayed(_parallel_approx_estimators)(
                n_estimators_list[i],
                self.base_estimators[starts[i]:starts[i + 1]],
                X,  # if it is a PyOD model, we do not need this
                self.n_estimators,
                self.approx_flags[starts[i]:starts[i + 1]],
                self.approx_clf,
                self.jl_transformers_[starts[i]:starts[i + 1]],
github yzhao062 / SUOD / examples / do_not_use_demo_full.py View on Github external
# number of the parallel jobs
n_jobs = 6
n_estimators = len(base_estimators)

# the algorithms that should be be using random projection
rp_clf_list = ['LOF', 'KNN', 'ABOD']
# the algorithms that should NOT use random projection
rp_ng_clf_list = ['IForest', 'PCA', 'HBOS']
# global flag for random projection
rp_flag_global = True
objective_dim = 6
rp_method = 'discrete'

# build flags for random projection
rp_flags, base_estimator_names = build_codes(base_estimators, rp_clf_list,
                                             rp_ng_clf_list, rp_flag_global)

# load the pre-trained cost predictor to forecast the train cost
clf_train = joblib.load(
    os.path.join('../suod', 'models', 'saved_models', 'bps_train.joblib'))

time_cost_pred = cost_forecast_meta(clf_train, X, base_estimator_names)

# schedule the tasks
n_estimators_list, starts, n_jobs = balanced_scheduling(time_cost_pred,
                                                        n_estimators, n_jobs)

print(starts)  # this is the list of being split
start = time.time()

print('Parallel Training...')
github yzhao062 / SUOD / suod / models / base.py View on Github external
self.rp_flag_global = rp_flag_global
        self.target_dim_frac = target_dim_frac
        self.jl_method = jl_method
        self.bps_flag = bps_flag
        self.verbose = verbose
        self.approx_flag_global = approx_flag_global
        self.contamination = contamination

        self._parameter_validation(contamination, n_jobs, rp_clf_list,
                                   rp_ng_clf_list, approx_clf_list,
                                   approx_ng_clf_list, approx_clf,
                                   cost_forecast_loc_fit,
                                   cost_forecast_loc_pred)

        # build flags for random projection
        self.rp_flags, self.base_estimator_names = build_codes(
            self.base_estimators, self.rp_clf_list, self.rp_ng_clf_list,
            self.rp_flag_global)
github yzhao062 / SUOD / examples / do_not_use_demo_full.py View on Github external
trained_estimators = _unfold_parallel(all_results[0], n_jobs)
jl_transformers = _unfold_parallel(all_results[1], n_jobs)

###############################################################################
# %% Model Approximation

approx_clf_list = ['LOF', 'KNN']
approx_ng_clf_list = ['IForest', 'PCA', 'HBOS', 'ABOD']
approx_flag_global = True

# build approx code
# this can be a pre-defined list and directly supply to the system

approx_clf = RandomForestRegressor(n_estimators=100)

approx_flags, base_estimator_names = build_codes(base_estimators,
                                                 approx_clf_list,
                                                 approx_ng_clf_list,
                                                 approx_flag_global)

n_estimators_list, starts, n_jobs = _partition_estimators(n_estimators,
                                                          n_jobs=n_jobs)
print(starts)  # this is the list of being split
start = time.time()

# TODO: here has a bug. For some reason, approximators do not match approx_flags
all_approx_results = Parallel(n_jobs=n_jobs, max_nbytes=None, verbose=True)(
    delayed(_parallel_approx_estimators)(
        n_estimators_list[i],
        trained_estimators[starts[i]:starts[i + 1]],
        X,  # if it is a PyOD model, we do not need this
        n_estimators,