How to use the ngboost.ngboost.NGBoost function in ngboost

To help you get started, we’ve selected a few ngboost examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github stanfordmlgroup / ngboost / figures / toy_single.py View on Github external
if __name__ == "__main__":

    argparser = ArgumentParser()
    argparser.add_argument("--n-estimators", type=int, default=301)
    argparser.add_argument("--lr", type=float, default=0.03)
    argparser.add_argument("--minibatch-frac", type=float, default=0.1)
    argparser.add_argument("--natural", action="store_true")
    args = argparser.parse_args()

    x_tr, y_tr, _ = gen_data(n=50)

    poly_transform = PolynomialFeatures(1)
    x_tr = poly_transform.fit_transform(x_tr)

    ngb = NGBoost(
        Base=default_tree_learner,
        Dist=Normal,
        Score=MLE,
        n_estimators=args.n_estimators,
        learning_rate=args.lr,
        natural_gradient=args.natural,
        minibatch_frac=args.minibatch_frac,
        verbose=True,
    )

    ngb.fit(x_tr, y_tr)

    x_te, y_te, _ = gen_data(n=1000, bound=1.3)
    x_te = poly_transform.transform(x_te)
    preds = ngb.pred_dist(x_te)
github stanfordmlgroup / ngboost / examples / visualizations / compare_distns.py View on Github external
argparser = ArgumentParser()
    argparser.add_argument("--dist", type=str, default="Normal")
    argparser.add_argument("--noise-dist", type=str, default="Normal")
    args = argparser.parse_args()

    m, n = 1000, 50
    if args.noise_dist == "Normal":
        noise = np.random.randn(*(m, 1))
    elif args.noise_dist == "Laplace":
        noise = sp.stats.laplace.rvs(size=(m, 1))
    beta = np.random.randn(n, 1)
    X = np.random.randn(m, n) / np.sqrt(n)
    Y = X @ beta + 0.5 * noise + 20
    print(X.shape, Y.shape)

    ngb = NGBoost(n_estimators=100, learning_rate=1.,
                  Dist=eval(args.dist),
                  Base=default_linear_learner,
                  natural_gradient=True,
                  minibatch_frac=1.0,
                  Score=MLE())
    ngb.fit(X, Y)

    preds = ngb.pred_dist(X)
    print(f"R2: {r2_score(Y, preds.loc):.4f}")

    pctles, observed, slope, intercept = calibration_regression(preds, Y)
    print(observed)
    plt.figure(figsize = (8, 3))
    plt.subplot(1, 2, 1)
    plot_calibration_curve(pctles, observed)
    plt.subplot(1, 2, 2)
github stanfordmlgroup / ngboost / experiments / regression.py View on Github external
# load dataset -- use last column as label
    data = dataset_name_to_loader[args.dataset]()
    X, y = data.iloc[:,:-1].values, data.iloc[:,-1].values

    # set default minibatch fraction based on dataset size
    if not args.minibatch_frac:
        args.minibatch_frac = min(0.8, 5000 / len(X))

    logger = RegressionLogger(args)

    for rep in range(args.n_reps):

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

        ngb = NGBoost(Base=base_name_to_learner[args.base],
                      Dist=Normal,
                      Score=score_name_to_score[args.score],
                      n_estimators=args.n_est,
                      learning_rate=args.lr,
                      natural_gradient=True,
                      second_order=True,
                      quadrant_search=True,
                      minibatch_frac=args.minibatch_frac,
                      nu_penalty=1e-5,
                      normalize_inputs=True,
                      normalize_outputs=True,
                      verbose=args.verbose)

        ngb.fit(X_train, y_train)
        forecast = ngb.pred_dist(X_test)
        logger.tick(forecast, y_test)
github stanfordmlgroup / ngboost / ngboost / sklearn_api.py View on Github external
import numpy as np
from sklearn.base import ClassifierMixin, RegressorMixin

from ngboost.ngboost import NGBoost
from ngboost.distns import Bernoulli, Normal


class NGBRegressor(NGBoost, RegressorMixin):
    """NGBoost for regression with Sklean API."""
    def __init__(self, *args, **kwargs):
        super(NGBRegressor, self).__init__(Dist=Normal, *args, **kwargs)


class NGBClassifier(NGBoost, ClassifierMixin):
    """NGBoost for classification with Sklean API.

    Warning:
        Dist need to be Bernoulli.
        You can use this model for only binary classification.
    """
    def __init__(self, *args, **kwargs):
        super(NGBClassifier, self).__init__(Dist=Bernoulli, *args, **kwargs)

    def predict(self, X):
        dist = self.pred_dist(X)
        return np.round(dist.prob)
github stanfordmlgroup / ngboost / ngboost / api.py View on Github external
"""
        Probability prediction of Y at the points X=x at multiple boosting iterations

        Parameters:
            X        : numpy array of predictors (n x p)
            max_iter : largest number of boosting iterations to get the prediction for
            
        Output:
            A list of of the estimates of P(Y=k|X=x) of shape (n, K), one per boosting stage up to max_iter
        """
        return [
            dist.class_probs() for dist in self.staged_pred_dist(X, max_iter=max_iter)
        ]


class NGBSurvival(NGBoost, BaseEstimator):
    """
    Constructor for NGBoost survival models.

    NGBRegressor is a wrapper for the generic NGBoost class that facilitates survival analysis. Use this class if you want to predict an outcome that could take an infinite number of (ordered) values, but right-censoring is present in the observed data.

     Parameters:
        Dist              : assumed distributional form of Y|X=x. A distribution from ngboost.distns, e.g. LogNormal
        Score             : rule to compare probabilistic predictions P̂ to the observed data y. A score from ngboost.scores, e.g. LogScore
        Base              : base learner to use in the boosting algorithm. Any instantiated sklearn regressor, e.g. DecisionTreeRegressor()
        natural_gradient  : logical flag indicating whether the natural gradient should be used
        n_estimators      : the number of boosting iterations to fit
        learning_rate     : the learning rate
        minibatch_frac    : the percent subsample of rows to use in each boosting iteration
        col_sample        : the percent subsample of columns to use in each boosting iteration
        verbose           : flag indicating whether output should be printed during fitting
        verbose_eval      : increment (in boosting iterations) at which output should be printed
github stanfordmlgroup / ngboost / ngboost / api.py View on Github external
def __getstate__(self):
        state = super().__getstate__()
        # Remove the unpicklable entries.
        if self.Dist.__name__ == "DistWithUncensoredScore":
            state["Dist"] = self.Dist.__base__
            state["uncensor"] = True
        return state

    def __setstate__(self, state_dict):
        if "uncensor" in state_dict.keys():
            state_dict["Dist"] = state_dict["Dist"].uncensor(state_dict["Score"])
        super().__setstate__(state_dict)


class NGBClassifier(NGBoost, BaseEstimator):
    """
    Constructor for NGBoost classification models.

    NGBRegressor is a wrapper for the generic NGBoost class that facilitates classification. Use this class if you want to predict an outcome that could take a discrete number of (unordered) values.

    Parameters:
        Dist              : assumed distributional form of Y|X=x. A distribution from ngboost.distns, e.g. Bernoulli
        Score             : rule to compare probabilistic predictions P̂ to the observed data y. A score from ngboost.scores, e.g. LogScore
        Base              : base learner to use in the boosting algorithm. Any instantiated sklearn regressor, e.g. DecisionTreeRegressor()
        natural_gradient  : logical flag indicating whether the natural gradient should be used
        n_estimators      : the number of boosting iterations to fit
        learning_rate     : the learning rate
        minibatch_frac    : the percent subsample of rows to use in each boosting iteration
        col_sample        : the percent subsample of columns to use in each boosting iteration
        verbose           : flag indicating whether output should be printed during fitting
        verbose_eval      : increment (in boosting iterations) at which output should be printed
github stanfordmlgroup / ngboost / ngboost / api.py View on Github external
from ngboost.distns import (
    Bernoulli,
    ClassificationDistn,
    LogNormal,
    Normal,
    RegressionDistn,
)
from ngboost.helpers import Y_from_censored
from ngboost.learners import default_tree_learner
from ngboost.ngboost import NGBoost
from ngboost.scores import LogScore
from sklearn.base import BaseEstimator
from sklearn.utils import check_array


class NGBRegressor(NGBoost, BaseEstimator):
    """
    Constructor for NGBoost regression models.

    NGBRegressor is a wrapper for the generic NGBoost class that facilitates regression. Use this class if you want to predict an outcome that could take an infinite number of (ordered) values.

    Parameters:
        Dist              : assumed distributional form of Y|X=x. A distribution from ngboost.distns, e.g. Normal
        Score             : rule to compare probabilistic predictions P̂ to the observed data y. A score from ngboost.scores, e.g. LogScore
        Base              : base learner to use in the boosting algorithm. Any instantiated sklearn regressor, e.g. DecisionTreeRegressor()
        natural_gradient  : logical flag indicating whether the natural gradient should be used
        n_estimators      : the number of boosting iterations to fit
        learning_rate     : the learning rate
        minibatch_frac    : the percent subsample of rows to use in each boosting iteration
        col_sample        : the percent subsample of columns to use in each boosting iteration
        verbose           : flag indicating whether output should be printed during fitting
        verbose_eval      : increment (in boosting iterations) at which output should be printed
github stanfordmlgroup / ngboost / ngboost / sklearn_api.py View on Github external
import numpy as np
from sklearn.base import ClassifierMixin, RegressorMixin

from ngboost.ngboost import NGBoost
from ngboost.distns import Bernoulli, Normal


class NGBRegressor(NGBoost, RegressorMixin):
    """NGBoost for regression with Sklean API."""
    def __init__(self, *args, **kwargs):
        super(NGBRegressor, self).__init__(Dist=Normal, *args, **kwargs)


class NGBClassifier(NGBoost, ClassifierMixin):
    """NGBoost for classification with Sklean API.

    Warning:
        Dist need to be Bernoulli.
        You can use this model for only binary classification.
    """
    def __init__(self, *args, **kwargs):
        super(NGBClassifier, self).__init__(Dist=Bernoulli, *args, **kwargs)

    def predict(self, X):