How to use the suod.utils.utility._unfold_parallel function in suod

To help you get started, we’ve selected a few suod examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github yzhao062 / SUOD / examples / do_not_use_demo_full.py View on Github external
n_estimators_list[i],
        base_estimators[starts[i]:starts[i + 1]],
        X,
        n_estimators,
        rp_flags[starts[i]:starts[i + 1]],
        objective_dim,
        rp_method=rp_method,
        verbose=True)
    for i in range(n_jobs))

print('Balanced Scheduling Total Train Time:', time.time() - start)

# reformat and unfold the lists. Save the trained estimators and transformers
all_results = list(map(list, zip(*all_results)))
trained_estimators = _unfold_parallel(all_results[0], n_jobs)
jl_transformers = _unfold_parallel(all_results[1], n_jobs)

###############################################################################
# %% Model Approximation

approx_clf_list = ['LOF', 'KNN']
approx_ng_clf_list = ['IForest', 'PCA', 'HBOS', 'ABOD']
approx_flag_global = True

# build approx code
# this can be a pre-defined list and directly supply to the system

approx_clf = RandomForestRegressor(n_estimators=100)

approx_flags, base_estimator_names = build_codes(base_estimators,
                                                 approx_clf_list,
                                                 approx_ng_clf_list,
github yzhao062 / SUOD / examples / temp_do_not_use_work_w_minist.py View on Github external
delayed(_parallel_fit)(
            n_estimators_list[i],
            base_estimators[starts[i]:starts[i + 1]],
            X,
            n_estimators,
            rp_flags[starts[i]:starts[i + 1]],
            objective_dim,
            rp_method=rp_method,
            verbose=True)
        for i in range(n_jobs))

    print('Orig Fit time:', time.time() - start)
    print()

    all_results = list(map(list, zip(*all_results)))
    trained_estimators = _unfold_parallel(all_results[0], n_jobs)
    jl_transformers = _unfold_parallel(all_results[1], n_jobs)

    ##########################################################################
    start = time.time()
    # model prediction
    all_results_pred = Parallel(n_jobs=n_jobs, max_nbytes=None,
                                verbose=True)(
        delayed(_parallel_predict)(
            n_estimators_list[i],
            trained_estimators[starts[i]:starts[i + 1]],
            None,
            X,
            n_estimators,
            rp_flags[starts[i]:starts[i + 1]],
            jl_transformers,
            approx_flags[starts[i]:starts[i + 1]],
github yzhao062 / SUOD / suod / models / base.py View on Github external
all_approx_results = Parallel(n_jobs=n_jobs, verbose=True)(
            delayed(_parallel_approx_estimators)(
                n_estimators_list[i],
                self.base_estimators[starts[i]:starts[i + 1]],
                X,  # if it is a PyOD model, we do not need this
                self.n_estimators,
                self.approx_flags[starts[i]:starts[i + 1]],
                self.approx_clf,
                self.jl_transformers_[starts[i]:starts[i + 1]],
                verbose=True)
            for i in range(n_jobs))

        # print('Balanced Scheduling Total Test Time:', time.time() - start)

        self.approximators = _unfold_parallel(all_approx_results, n_jobs)
        return self
github yzhao062 / SUOD / suod / models / base.py View on Github external
X,
                self.n_estimators,
                self.rp_flags[starts[i]:starts[i + 1]],
                self.target_dim_frac_,
                self.jl_method,
                verbose=self.verbose)
            for i in range(n_jobs))

        print('Balanced Scheduling Total Train Time:', time.time() - start)

        # reformat and unfold the lists. Save the trained estimators and transformers
        all_results = list(map(list, zip(*all_results)))

        # overwrite estimators
        self.base_estimators = _unfold_parallel(all_results[0], n_jobs)
        self.jl_transformers_ = _unfold_parallel(all_results[1], n_jobs)

        return self
github yzhao062 / SUOD / examples / demo_full.py View on Github external
n_estimators_list[i],
            base_estimators[starts[i]:starts[i + 1]],
            X_train,
            n_estimators,
            rp_flags[starts[i]:starts[i + 1]],
            objective_dim,
            rp_method=rp_method,
            verbose=True)
        for i in range(n_jobs))

    print('Orig Fit time:', time.time() - start)
    print()

    all_results = list(map(list, zip(*all_results)))
    trained_estimators = _unfold_parallel(all_results[0], n_jobs)
    jl_transformers = _unfold_parallel(all_results[1], n_jobs)

    ##########################################################################
    start = time.time()
    n_estimators = len(base_estimators)
    n_estimators_list, starts, n_jobs = _partition_estimators(n_estimators,
                                                              n_jobs)
    # model prediction
    all_results_pred = Parallel(n_jobs=n_jobs, max_nbytes=None,
                                verbose=True)(
        delayed(_parallel_predict)(
            n_estimators_list[i],
            trained_estimators[starts[i]:starts[i + 1]],
            None,
            X_test,
            n_estimators,
            jl_transformers,
github yzhao062 / SUOD / examples / temp_do_not_use_work_w_minist.py View on Github external
n_estimators_list[i],
            base_estimators[starts[i]:starts[i + 1]],
            X,
            n_estimators,
            rp_flags[starts[i]:starts[i + 1]],
            objective_dim,
            rp_method=rp_method,
            verbose=True)
        for i in range(n_jobs))

    print('Orig Fit time:', time.time() - start)
    print()

    all_results = list(map(list, zip(*all_results)))
    trained_estimators = _unfold_parallel(all_results[0], n_jobs)
    jl_transformers = _unfold_parallel(all_results[1], n_jobs)

    ##########################################################################
    start = time.time()
    # model prediction
    all_results_pred = Parallel(n_jobs=n_jobs, max_nbytes=None,
                                verbose=True)(
        delayed(_parallel_predict)(
            n_estimators_list[i],
            trained_estimators[starts[i]:starts[i + 1]],
            None,
            X,
            n_estimators,
            rp_flags[starts[i]:starts[i + 1]],
            jl_transformers,
            approx_flags[starts[i]:starts[i + 1]],
            contamination,
github yzhao062 / SUOD / suod / models / base.py View on Github external
self.base_estimators[starts[i]:starts[i + 1]],
                X,
                self.n_estimators,
                self.rp_flags[starts[i]:starts[i + 1]],
                self.target_dim_frac_,
                self.jl_method,
                verbose=self.verbose)
            for i in range(n_jobs))

        print('Balanced Scheduling Total Train Time:', time.time() - start)

        # reformat and unfold the lists. Save the trained estimators and transformers
        all_results = list(map(list, zip(*all_results)))

        # overwrite estimators
        self.base_estimators = _unfold_parallel(all_results[0], n_jobs)
        self.jl_transformers_ = _unfold_parallel(all_results[1], n_jobs)

        return self
github yzhao062 / SUOD / examples / demo_full.py View on Github external
delayed(_parallel_fit)(
            n_estimators_list[i],
            base_estimators[starts[i]:starts[i + 1]],
            X_train,
            n_estimators,
            rp_flags[starts[i]:starts[i + 1]],
            objective_dim,
            rp_method=rp_method,
            verbose=True)
        for i in range(n_jobs))

    print('Orig Fit time:', time.time() - start)
    print()

    all_results = list(map(list, zip(*all_results)))
    trained_estimators = _unfold_parallel(all_results[0], n_jobs)
    jl_transformers = _unfold_parallel(all_results[1], n_jobs)

    ##########################################################################
    start = time.time()
    n_estimators = len(base_estimators)
    n_estimators_list, starts, n_jobs = _partition_estimators(n_estimators,
                                                              n_jobs)
    # model prediction
    all_results_pred = Parallel(n_jobs=n_jobs, max_nbytes=None,
                                verbose=True)(
        delayed(_parallel_predict)(
            n_estimators_list[i],
            trained_estimators[starts[i]:starts[i + 1]],
            None,
            X_test,
            n_estimators,
github yzhao062 / SUOD / examples / do_not_use_demo_full.py View on Github external
delayed(_parallel_fit)(
        n_estimators_list[i],
        base_estimators[starts[i]:starts[i + 1]],
        X,
        n_estimators,
        rp_flags[starts[i]:starts[i + 1]],
        objective_dim,
        rp_method=rp_method,
        verbose=True)
    for i in range(n_jobs))

print('Balanced Scheduling Total Train Time:', time.time() - start)

# reformat and unfold the lists. Save the trained estimators and transformers
all_results = list(map(list, zip(*all_results)))
trained_estimators = _unfold_parallel(all_results[0], n_jobs)
jl_transformers = _unfold_parallel(all_results[1], n_jobs)

###############################################################################
# %% Model Approximation

approx_clf_list = ['LOF', 'KNN']
approx_ng_clf_list = ['IForest', 'PCA', 'HBOS', 'ABOD']
approx_flag_global = True

# build approx code
# this can be a pre-defined list and directly supply to the system

approx_clf = RandomForestRegressor(n_estimators=100)

approx_flags, base_estimator_names = build_codes(base_estimators,
                                                 approx_clf_list,
github yzhao062 / SUOD / examples / do_not_use_demo_full.py View on Github external
# TODO: here has a bug. For some reason, approximators do not match approx_flags
all_approx_results = Parallel(n_jobs=n_jobs, max_nbytes=None, verbose=True)(
    delayed(_parallel_approx_estimators)(
        n_estimators_list[i],
        trained_estimators[starts[i]:starts[i + 1]],
        X,  # if it is a PyOD model, we do not need this
        n_estimators,
        approx_flags[starts[i]:starts[i + 1]],
        approx_clf,
        verbose=True)
    for i in range(n_jobs))

print('Balanced Scheduling Total Test Time:', time.time() - start)

approximators = _unfold_parallel(all_approx_results, n_jobs)


# %% Second BPS for prediction
###############################################################################
# still build the rank sum by BPS
# load the pre-trained cost predictor to forecast the prediction cost
clf_prediction = joblib.load(
    os.path.join('../suod', 'models', 'saved_models', 'bps_prediction.joblib'))

time_cost_pred = cost_forecast_meta(clf_prediction, X, base_estimator_names)

# TODO: add a second-stage tuner for prediction stage

n_estimators_list, starts, n_jobs = balanced_scheduling(time_cost_pred,
                                                        n_estimators, n_jobs)