How to use the bonsai.ensemble.paloforest.PaloForest function in bonsai

To help you get started, we’ve selected a few bonsai examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github yubin-park / bonsai-dt / tests / paloboost.py View on Github external
def test_dumpload():
    X, y = make_friedman1(n_samples=10000, noise=5) 
    n, m = X.shape
    X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                            test_size=0.5)

    model = PaloForest(distribution="gaussian",
                            n_estimators=100,
                            learning_rate=1.0,
                            max_depth=4,
                            subsample0=0.5)

    print("\n")
    print("# Test Dump/Load")
    print("-----------------------------------------------------")
    print(" model_name     train_time     predict_time   rmse   ")
    print("-----------------------------------------------------")
    print(" {0:12}   {1:12}   {2:12}   {3:.5f}".format(
            "baseline", "-", "-", np.std(y_test)))

    # Fit
    start = time.time()
    model.fit(X_train, y_train)
github yubin-park / bonsai-dt / tests / test_paloforest.py View on Github external
n_samples = 10000
        test_size = 0.2
        n_est = 10
        max_depth = 5
        lr = 0.1

        X, y = make_hastie_11_2(n_samples)
        X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                test_size=test_size)

        model_org = PaloForest(distribution="bernoulli",
                                n_estimators=n_est, 
                                learning_rate=lr,
                                max_depth=max_depth,
                                calibrate=False)
        model_clb = PaloForest(distribution="bernoulli",
                                n_estimators=n_est, 
                                learning_rate=lr,
                                max_depth=max_depth,
                                calibrate=True)

        model_org.fit(X_train, y_train)
        y_hat = model_org.predict_proba(X_test)[:,1]
        auc_org = roc_auc_score(y_test, y_hat)
        brier_org = brier_score_loss(y_test, y_hat)

        model_clb.fit(X_train, y_train)
        y_hat = model_clb.predict_proba(X_test)[:,1]
        auc_clb = roc_auc_score(y_test, y_hat)
        brier_clb = brier_score_loss(y_test, y_hat)

        self.assertTrue(auc_org > 0.5)
github yubin-park / bonsai-dt / tests / paloforest.py View on Github external
def test_dumpload():

    X, y = make_hastie_10_2(n_samples=10000) 
    y[y<0] = 0
    n, m = X.shape
    X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                            test_size=0.5)

    model = PaloForest(distribution="bernoulli",
                            n_estimators=10, 
                            learning_rate=1.0,
                            max_depth=4,
                            subsample0=0.5,
                            calibrate=True)

    print("\n")
    print("# Test Dump/Load")
    print("-----------------------------------------------------")
    print(" model_name     train_time     predict_time   auc    ")
    print("-----------------------------------------------------")
    print(" {0:12}   {1:12}   {2:12}   {3:.5f}".format(
            "baseline", "-", "-", 0.5))

    # Fit
    start = time.time()
github yubin-park / bonsai-dt / tests / paloforest.py View on Github external
def test_classification():

    X, y = make_hastie_10_2(n_samples=10000) 
    y[y<0] = 0
    n, m = X.shape
    X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                            test_size=0.5)

    models = {"palofrst_org": PaloForest(distribution="bernoulli",
                            n_estimators=10, 
                            learning_rate=1.0,
                            max_depth=5,
                            subsample0=0.5,
                            calibrate=False),
            "palofrst_clb": PaloForest(distribution="bernoulli",
                            n_estimators=10, 
                            learning_rate=1.0,
                            max_depth=5,
                            subsample0=0.5,
                            calibrate=True)}

    print("\n")
    print("# Test Classification")
    print("-----------------------------------------------------")
    print(" model_name     train_time     auc       brier       ")
github yubin-park / bonsai-dt / tests / test_paloforest.py View on Github external
def test_cls(self):

        np.random.seed(1)
        n_samples = 10000
        test_size = 0.2
        n_est = 10
        max_depth = 5
        lr = 0.1

        X, y = make_hastie_11_2(n_samples)
        X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                test_size=test_size)

        model_org = PaloForest(distribution="bernoulli",
                                n_estimators=n_est, 
                                learning_rate=lr,
                                max_depth=max_depth,
                                calibrate=False)
        model_clb = PaloForest(distribution="bernoulli",
                                n_estimators=n_est, 
                                learning_rate=lr,
                                max_depth=max_depth,
                                calibrate=True)

        model_org.fit(X_train, y_train)
        y_hat = model_org.predict_proba(X_test)[:,1]
        auc_org = roc_auc_score(y_test, y_hat)
        brier_org = brier_score_loss(y_test, y_hat)

        model_clb.fit(X_train, y_train)
github yubin-park / bonsai-dt / tests / paloboost.py View on Github external
def test_classification():

    X, y = make_hastie_10_2(n_samples=10000) 
    y[y<0] = 0
    n, m = X.shape
    X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                            test_size=0.5)

    models = {"palobst": PaloBoost(distribution="bernoulli",
                            n_estimators=10, 
                            learning_rate=1.0,
                            max_depth=4,
                            subsample=0.5),
            "palofrst": PaloForest(distribution="bernoulli",
                            n_estimators=10, 
                            learning_rate=1.0,
                            max_depth=4,
                            subsample0=0.5),
            "gbm": GBM(distribution="bernoulli",
                            n_estimators=10, 
                            learning_rate=1.0,
                            max_depth=4,
                            subsample=0.5),
            "sklearn": GradientBoostingClassifier(
                        n_estimators=10, 
                        learning_rate=1.0,
                        max_depth=4, 
                        subsample=0.5)}

    print("\n")