How to use suod - 10 common examples

To help you get started, we’ve selected a few suod examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github yzhao062 / SUOD / examples / module_examples / M1_RP / demo_random_projection.py View on Github external
clf = LOF()  # change this to other detection algorithms
        clf.fit(X)
        y_train_scores = clf.decision_scores_
        original_time.append(time.time() - start)
        original_roc.append(roc_auc_score(y, y_train_scores))
        original_prn.append(precision_n_scores(y, y_train_scores))

        X_transformed, _ = jl_fit_transform(X, dim_new, "basic")
        start = time.time()
        clf.fit(X_transformed)
        y_train_scores = clf.decision_scores_
        basic_time.append(time.time() - start)
        basic_roc.append(roc_auc_score(y, y_train_scores))
        basic_prn.append(precision_n_scores(y, y_train_scores))

        X_transformed, _ = jl_fit_transform(X, dim_new, "discrete")
        start = time.time()
        clf.fit(X_transformed)
        y_train_scores = clf.decision_scores_
        discrete_time.append(time.time() - start)
        discrete_roc.append(roc_auc_score(y, y_train_scores))
        discrete_prn.append(precision_n_scores(y, y_train_scores))

        X_transformed, _ = jl_fit_transform(X, dim_new, "circulant")
        start = time.time()
        clf.fit(X_transformed)
        y_train_scores = clf.decision_scores_
        circulant_time.append(time.time() - start)
        circulant_roc.append(roc_auc_score(y, y_train_scores))
        circulant_prn.append(precision_n_scores(y, y_train_scores))

        X_transformed, _ = jl_fit_transform(X, dim_new, "toeplitz")
github yzhao062 / SUOD / examples / module_examples / M1_RP / demo_random_projection.py View on Github external
start = time.time()
        clf.fit(X_transformed)
        y_train_scores = clf.decision_scores_
        discrete_time.append(time.time() - start)
        discrete_roc.append(roc_auc_score(y, y_train_scores))
        discrete_prn.append(precision_n_scores(y, y_train_scores))

        X_transformed, _ = jl_fit_transform(X, dim_new, "circulant")
        start = time.time()
        clf.fit(X_transformed)
        y_train_scores = clf.decision_scores_
        circulant_time.append(time.time() - start)
        circulant_roc.append(roc_auc_score(y, y_train_scores))
        circulant_prn.append(precision_n_scores(y, y_train_scores))

        X_transformed, _ = jl_fit_transform(X, dim_new, "toeplitz")
        start = time.time()
        clf.fit(X_transformed)
        y_train_scores = clf.decision_scores_
        toeplitz_time.append(time.time() - start)
        toeplitz_roc.append(roc_auc_score(y, y_train_scores))
        toeplitz_prn.append(precision_n_scores(y, y_train_scores))

        X_transformed = PCA_sklearn(n_components=dim_new).fit_transform(X)
        start = time.time()
        clf.fit(X_transformed)
        y_train_scores = clf.decision_scores_
        pca_time.append(time.time() - start)
        pca_roc.append(roc_auc_score(y, y_train_scores))
        pca_prn.append(precision_n_scores(y, y_train_scores))

        selected_features = generate_bagging_indices(random_state=j,
github yzhao062 / SUOD / examples / module_examples / M1_RP / demo_random_projection.py View on Github external
pca_time = []

    rp_roc = []
    rp_prn = []
    rp_time = []

    for j in range(n_iter):
        start = time.time()
        clf = LOF()  # change this to other detection algorithms
        clf.fit(X)
        y_train_scores = clf.decision_scores_
        original_time.append(time.time() - start)
        original_roc.append(roc_auc_score(y, y_train_scores))
        original_prn.append(precision_n_scores(y, y_train_scores))

        X_transformed, _ = jl_fit_transform(X, dim_new, "basic")
        start = time.time()
        clf.fit(X_transformed)
        y_train_scores = clf.decision_scores_
        basic_time.append(time.time() - start)
        basic_roc.append(roc_auc_score(y, y_train_scores))
        basic_prn.append(precision_n_scores(y, y_train_scores))

        X_transformed, _ = jl_fit_transform(X, dim_new, "discrete")
        start = time.time()
        clf.fit(X_transformed)
        y_train_scores = clf.decision_scores_
        discrete_time.append(time.time() - start)
        discrete_roc.append(roc_auc_score(y, y_train_scores))
        discrete_prn.append(precision_n_scores(y, y_train_scores))

        X_transformed, _ = jl_fit_transform(X, dim_new, "circulant")
github yzhao062 / SUOD / suod / models / parallel_processes.py View on Github external
def _parallel_fit(n_estimators, clfs, X, total_n_estimators,
                  rp_flags, objective_dim, rp_method, verbose):
    X = check_array(X)
    # Build estimators
    estimators = []
    rp_transformers = []
    for i in range(n_estimators):
        estimator = clone(clfs[i])
        if verbose > 1:
            print("Building estimator %d of %d for this parallel run "
                  "(total %d)..." % (i + 1, n_estimators, total_n_estimators))

        if rp_flags[i] == 1:
            X_scaled, jlt_transformer = jl_fit_transform(X, objective_dim,
                                                         rp_method)
            rp_transformers.append(jlt_transformer)

            estimator.fit(X_scaled)
            estimators.append(estimator)
        else:
            # if projection is not used, use an identity matrix to keep the shape
            rp_transformers.append(np.ones([X.shape[1], X.shape[1]]))
            estimator.fit(X)
            estimators.append(estimator)
    return estimators, rp_transformers
github yzhao062 / SUOD / examples / module_examples / M1_RP / demo_random_projection.py View on Github external
start = time.time()
        clf.fit(X_transformed)
        y_train_scores = clf.decision_scores_
        basic_time.append(time.time() - start)
        basic_roc.append(roc_auc_score(y, y_train_scores))
        basic_prn.append(precision_n_scores(y, y_train_scores))

        X_transformed, _ = jl_fit_transform(X, dim_new, "discrete")
        start = time.time()
        clf.fit(X_transformed)
        y_train_scores = clf.decision_scores_
        discrete_time.append(time.time() - start)
        discrete_roc.append(roc_auc_score(y, y_train_scores))
        discrete_prn.append(precision_n_scores(y, y_train_scores))

        X_transformed, _ = jl_fit_transform(X, dim_new, "circulant")
        start = time.time()
        clf.fit(X_transformed)
        y_train_scores = clf.decision_scores_
        circulant_time.append(time.time() - start)
        circulant_roc.append(roc_auc_score(y, y_train_scores))
        circulant_prn.append(precision_n_scores(y, y_train_scores))

        X_transformed, _ = jl_fit_transform(X, dim_new, "toeplitz")
        start = time.time()
        clf.fit(X_transformed)
        y_train_scores = clf.decision_scores_
        toeplitz_time.append(time.time() - start)
        toeplitz_roc.append(roc_auc_score(y, y_train_scores))
        toeplitz_prn.append(precision_n_scores(y, y_train_scores))

        X_transformed = PCA_sklearn(n_components=dim_new).fit_transform(X)
github yzhao062 / SUOD / examples / demo_full.py View on Github external
y = mat['y']

    # split dataset into train and test
    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.4, random_state=42)

    # standardize data to be digestible for most algorithms
    X_train, X_test = standardizer(X_train, X_test)

    contamination = y.sum() / len(y)

    # get estimators for training and prediction
    base_estimators = get_estimators(contamination=contamination)

    ##########################################################################
    model = SUOD(base_estimators=base_estimators, rp_flag_global=True,
                 approx_clf=approx_clf,
                 n_jobs=n_jobs, bps_flag=True, contamination=contamination,
                 approx_flag_global=True)

    start = time.time()
    model.fit(X_train)  # fit all models with X
    print('Fit time:', time.time() - start)
    print()

    start = time.time()
    model.approximate(X_train)  # conduct model approximation if it is enabled
    print('Approximation time:', time.time() - start)
    print()

    start = time.time()
    predicted_labels = model.predict(X_test)  # predict labels
github yzhao062 / SUOD / examples / demo_base.py View on Github external
LOF(n_neighbors=45, contamination=contamination),
        HBOS(contamination=contamination),
        PCA(contamination=contamination),
        OCSVM(contamination=contamination),
        KNN(n_neighbors=5, contamination=contamination),
        KNN(n_neighbors=15, contamination=contamination),
        KNN(n_neighbors=25, contamination=contamination),
        KNN(n_neighbors=35, contamination=contamination),
        KNN(n_neighbors=45, contamination=contamination),
        IForest(n_estimators=50, contamination=contamination),
        IForest(n_estimators=100, contamination=contamination),
        LSCP(detector_list=[LOF(contamination=contamination),
                            LOF(contamination=contamination)])
    ]

    model = SUOD(base_estimators=base_estimators, n_jobs=6, bps_flag=True,
                 contamination=contamination, approx_flag_global=True)

    model.fit(X_train)  # fit all models with X
    model.approximate(X_train)  # conduct model approximation if it is enabled
    predicted_labels = model.predict(X_test)  # predict labels
    predicted_scores = model.decision_function(X_test)  # predict scores
    predicted_probs = model.predict_proba(X_test)  # predict scores

    ###########################################################################
    # compared with other approaches
    evaluate_print('majority vote', y_test, majority_vote(predicted_labels))
    evaluate_print('average', y_test, average(predicted_scores))
    evaluate_print('maximization', y_test, maximization(predicted_scores))

    clf = LOF()
    clf.fit(X_train)
github yzhao062 / SUOD / examples / temp_do_not_use_work_w_minist.py View on Github external
LSCP(detector_list=[LOF(contamination=contamination),
                            LOF(contamination=contamination)]),
        LSCP(detector_list=[LOF(contamination=contamination),
                            LOF(contamination=contamination)]),
        LSCP(detector_list=[LOF(contamination=contamination),
                            LOF(contamination=contamination)]),
        LSCP(detector_list=[LOF(contamination=contamination),
                            LOF(contamination=contamination)]),
        LSCP(detector_list=[LOF(contamination=contamination),
                            LOF(contamination=contamination)]),
    ]

    # model = SUOD(base_estimators=base_estimators, rp_flag_global=True,
    #              n_jobs=6, bps_flag=False, contamination=contamination,
    #              approx_flag_global=True)
    model = SUOD(base_estimators=base_estimators, rp_flag_global=True,
                 n_jobs=6, bps_flag=True, contamination=contamination,
                 approx_flag_global=True)

    start = time.time()
    model.fit(X)  # fit all models with X
    print('Fit time:', time.time() - start)
    print()

    start = time.time()
    model.approximate(X)  # conduct model approximation if it is enabled
    print('Approximation time:', time.time() - start)
    print()

    start = time.time()
    predicted_labels = model.predict(X)  # predict labels
    print('Predict time:', time.time() - start)
github yzhao062 / SUOD / examples / temp_do_not_use.py View on Github external
X = mat['X']
    y = mat['y']


    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.4, random_state=42)
        
    # standardize data to be digestible for most algorithms
    X_train, X_test = standardizer(X_train, X_test)

    contamination = y.sum() / len(y)

    base_estimators = deepcopy(get_estimators(contamination=contamination))

    ##########################################################################
    model = SUOD(base_estimators=base_estimators, rp_flag_global=True, 
                 approx_clf=RandomForestRegressor(),
                 n_jobs=n_jobs, bps_flag=True, contamination=contamination,
                 approx_flag_global=True)

    start = time.time()
    model.fit(X_train)  # fit all models with X
    print('Fit time:', time.time() - start)
    print()

    start = time.time()
    model.approximate(X_train)  # conduct model approximation if it is enabled
    print('Approximation time:', time.time() - start)
    print()

    start = time.time()
    predicted_labels = model.predict(X_test)  # predict labels
github yzhao062 / SUOD / suod / models / parallel_processes.py View on Github external
approx_flags
    approximator
    verbose

    Returns
    -------

    """
    X = check_array(X)
    # Build estimators
    approximators = []

    # TODO: approximators can be different
    for i in range(n_estimators):
        # project matrix
        X_scaled = jl_transform(X, rp_transformers[i])

        estimator = clfs[i]

        check_is_fitted(estimator, ['decision_scores_'])
        if verbose > 1:
            print("Building estimator %d of %d for this parallel run "
                  "(total %d)..." % (i + 1, n_estimators, total_n_estimators))

        if approx_flags[i] == 1:
            # operate on the reduce space
            pseudo_scores = estimator.decision_scores_
            # pseudo_scores = estimator.decision_function(X)
            # use the same type of approximator for all models
            base_approximater = clone(approximator)
            base_approximater.fit(X_scaled, pseudo_scores)