How to use the bonsai.ensemble.paloboost.PaloBoost function in bonsai

To help you get started, we’ve selected a few bonsai examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github yubin-park / bonsai-dt / tests / test_paloboost.py View on Github external
def test_rgs(self):

        np.random.seed(1)
        n_samples = 10000
        test_size = 0.2
        n_est = 100
        max_depth = 7
        lr = 0.1

        X, y = make_friedman1_poly(n_samples=n_samples) 
        X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                test_size=test_size)

        model_palo = PaloBoost(distribution="gaussian",
                                n_estimators=n_est,
                                learning_rate=lr,
                                max_depth=max_depth)
        model_sklr = GradientBoostingRegressor(
                            n_estimators=n_est, 
                            learning_rate=lr,
                            max_depth=max_depth)

        model_palo.fit(X_train, y_train)
        y_hat = model_palo.predict(X_test)
        rmse_palo = np.sqrt(np.mean((y_test - y_hat)**2))

        model_sklr.fit(X_train, y_train)
        y_hat = model_sklr.predict(X_test)
        rmse_sklr = np.sqrt(np.mean((y_test - y_hat)**2))
github yubin-park / bonsai-dt / tests / paloboost.py View on Github external
def test_classification():

    X, y = make_hastie_10_2(n_samples=10000) 
    y[y<0] = 0
    n, m = X.shape
    X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                            test_size=0.5)

    models = {"palobst": PaloBoost(distribution="bernoulli",
                            n_estimators=10, 
                            learning_rate=1.0,
                            max_depth=4,
                            subsample=0.5),
            "palofrst": PaloForest(distribution="bernoulli",
                            n_estimators=10, 
                            learning_rate=1.0,
                            max_depth=4,
                            subsample0=0.5),
            "gbm": GBM(distribution="bernoulli",
                            n_estimators=10, 
                            learning_rate=1.0,
                            max_depth=4,
                            subsample=0.5),
            "sklearn": GradientBoostingClassifier(
                        n_estimators=10,
github yubin-park / bonsai-dt / tests / paloboost.py View on Github external
def test_regression():

    X, y = make_friedman1(n_samples=10000, noise=5) 
    n, m = X.shape
    X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                            test_size=0.5)

    models = {"palobst": PaloBoost(distribution="gaussian",
                            n_estimators=100,
                            learning_rate=1.0,
                            max_depth=4,
                            subsample=0.5),
            "palofrst": PaloForest(distribution="gaussian",
                            n_estimators=100,
                            learning_rate=1.0,
                            max_depth=4,
                            subsample0=0.5),
            "gbm": GBM(distribution="gaussian",
                        n_estimators=100, 
                        learning_rate=1.0,
                        max_depth=4,
                        subsample=0.5),
            "sklearn": GradientBoostingRegressor(
                        n_estimators=100,
github yubin-park / bonsai-dt / tests / test_paloboost.py View on Github external
def test_cls(self):

        np.random.seed(1)
        n_samples = 10000
        test_size = 0.2
        n_est = 100
        max_depth = 7
        lr = 0.1

        X, y = make_hastie_11_2(n_samples)
        X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                test_size=test_size)

        model_palo = PaloBoost(distribution="bernoulli",
                                n_estimators=n_est, 
                                learning_rate=lr,
                                max_depth=max_depth)
        model_sklr = GradientBoostingClassifier(
                                n_estimators=n_est, 
                                learning_rate=lr,
                                max_depth=max_depth)


        model_palo.fit(X_train, y_train)
        y_hat = model_palo.predict_proba(X_test)[:,1]
        auc_palo = roc_auc_score(y_test, y_hat)

        model_sklr.fit(X_train, y_train)
        y_hat = model_sklr.predict_proba(X_test)[:,1]
        auc_sklr = roc_auc_score(y_test, y_hat)
github yubin-park / bonsai-dt / research / run_experiments.py View on Github external
def run_aux(learning_rate, max_depth, n_estimators=200):

    X, y = get_friedman()
    model = PaloBoost(distribution="gaussian",
                        n_estimators=n_estimators,
                        learning_rate=learning_rate,
                        max_depth=max_depth, 
                        subsample=0.7)
    model.fit(X, y)
    prune_df = pd.DataFrame(model.get_prune_stats())
    prune_df.columns = ["iteration", "nodes_pre", "nodes_post"]
    lr_df = pd.DataFrame(model.get_lr_stats())
    lr_df.columns = ["iteration", "lr"] 

    prune_df.to_csv("results/prune_{}_{}.csv".format(learning_rate, max_depth),
                    index=False)
    lr_df.to_csv("results/lr_{}_{}.csv".format(learning_rate, max_depth),
                    index=False)
github yubin-park / bonsai-dt / bonsai / ensemble / paloforest.py View on Github external
if self.block_size is not None:
                n_block = int(n/self.block_size) + 1
                mask_block = (np.random.rand(n_block) < self.subsample0)
                mask = np.repeat(mask_block, self.block_size)[:n]
            else:
                mask = (np.random.rand(n) < self.subsample0)
            
            X_i, y_i = X[mask,:], y[mask]
            X_j, y_j = X[~mask,:], y[~mask]

            if (self.distribution == "bernoulli" and
                (np.unique(y_i).shape[0]==1 or
                    np.unique(y_j).shape[0]==1)):
                continue

            est = PaloBoost(distribution=self.distribution,
                               learning_rate=self.learning_rate,
                                max_depth=self.max_depth,
                                n_estimators=self.n_estimators,
                                subsample=self.subsample1,
                                subsample_splts=self.subsample2,
                                random_state=i*self.n_estimators)
            est.fit(X_i, y_i)
            self.estimators.append(est) 
            if self.feature_importances_ is None:
                self.feature_importances_ = est.feature_importances_
            else:
                self.feature_importances_ += est.feature_importances_
            
            if (self.distribution=="bernoulli" and
                self.calibrate):
                z_j = est.predict_proba(X_j)[:,1]
github yubin-park / bonsai-dt / research / run_experiments.py View on Github external
def clstask(X, y, n_estimators, learning_rate, max_depth, n_btstrp, 
            has_missing, test_size, add_noise):
    models = {"0. PaloBoost": PaloBoost(distribution="bernoulli",
                        n_estimators=n_estimators,
                        learning_rate=learning_rate,
                        max_depth=max_depth, 
                        subsample=0.7),
        "1. SGTB-Bonsai": GBM(distribution="bernoulli",
                        n_estimators=n_estimators,
                        learning_rate=learning_rate,
                        max_depth=max_depth, 
                        subsample=0.7),
         "2. XGBoost": XGBClassifier(
                    n_estimators=n_estimators, 
                    learning_rate=learning_rate,
                    max_depth=max_depth, 
                    subsample=0.7)}
    if not has_missing:
        models["3. Scikit-Learn"] = GradientBoostingClassifier(
github yubin-park / bonsai-dt / research / run_experiments.py View on Github external
def regtask(X, y, n_estimators, learning_rate, max_depth, n_btstrp, 
        has_missing, test_size):
    models = {"0. PaloBoost": PaloBoost(distribution="gaussian",
                        n_estimators=n_estimators,
                        learning_rate=learning_rate,
                        max_depth=max_depth, 
                        subsample=0.7),
        "1. SGTB-Bonsai": GBM(distribution="gaussian",
                        n_estimators=n_estimators,
                        learning_rate=learning_rate,
                        max_depth=max_depth, 
                        subsample=0.7),
        "2. XGBoost": XGBRegressor(
                    n_estimators=n_estimators, 
                    learning_rate=learning_rate,
                    max_depth=max_depth, 
                    subsample=0.7)}
    if not has_missing:
        models["3. Scikit-Learn"] = GradientBoostingRegressor(
github yubin-park / bonsai-dt / bonsai / ensemble / paloforest.py View on Github external
def load(self, model):
        # NOTE: not yet
        self.calibrators = model["clb"]
        self.calibrate = model["calibrate"]
        self.distribution = model["distribution"]
        self.estimators = []
        for d in model["est"]:
            est = PaloBoost()
            est.load(d)
            self.estimators.append(est)