How to use ngboost - 10 common examples

To help you get started, we’ve selected a few ngboost examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github stanfordmlgroup / ngboost / tests / test_basic.py View on Github external
def test_regression(self):
        data, target = load_boston(True)
        x_train, x_test, y_train, y_test = train_test_split(data, target,
                                                            test_size=0.2,
                                                            random_state=42)
        ngb = NGBoost(Base=default_tree_learner, Dist=Normal, Score=MLE,
                      natural_gradient=True, verbose=False)
        ngb.fit(x_train, y_train)
        preds = ngb.predict(x_test)
        score = mean_squared_error(y_test, preds)
        assert score <= 8.0
github stanfordmlgroup / ngboost / tests / test_basic.py View on Github external
def test_classification(self):
        data, target = load_breast_cancer(True)
        x_train, x_test, y_train, y_test = train_test_split(data, target,
                                                            test_size=0.2,
                                                            random_state=42)
        ngb = NGBoost(Base=default_tree_learner, Dist=Bernoulli, Score=MLE,
                      verbose=False)
        ngb.fit(x_train, y_train)
        preds = ngb.pred_dist(x_test)
        score = roc_auc_score(y_test, preds.prob)
        assert score >= 0.95
github stanfordmlgroup / ngboost / tests / test_with_sklearn.py View on Github external
def test_classification():
    from sklearn.datasets import load_breast_cancer
    from sklearn.metrics import roc_auc_score
    data, target = load_breast_cancer(True)
    x_train, x_test, y_train, y_test = train_test_split(data, target,
                                                        test_size=0.2,
                                                        random_state=42)
    ngb = NGBClassifier(Dist=Bernoulli, verbose=False)
    ngb.fit(x_train, y_train)
    preds = ngb.predict(x_test)
    score = roc_auc_score(y_test, preds)
    assert score >= 0.95
github stanfordmlgroup / ngboost / tests / test_with_sklearn.py View on Github external
def test_regression():
    from sklearn.datasets import load_boston
    from sklearn.metrics import mean_squared_error
    data, target = load_boston(True)
    x_train, x_test, y_train, y_test = train_test_split(data, target,
                                                        test_size=0.2,
                                                        random_state=42)
    ngb = NGBRegressor(verbose=False)
    ngb.fit(x_train, y_train)
    preds = ngb.predict(x_test)
    score = mean_squared_error(y_test, preds)
    assert score <= 8.0
github stanfordmlgroup / ngboost / figures / toy_single.py View on Github external
argparser = ArgumentParser()
    argparser.add_argument("--n-estimators", type=int, default=301)
    argparser.add_argument("--lr", type=float, default=0.03)
    argparser.add_argument("--minibatch-frac", type=float, default=0.1)
    argparser.add_argument("--natural", action="store_true")
    args = argparser.parse_args()

    x_tr, y_tr, _ = gen_data(n=50)

    poly_transform = PolynomialFeatures(1)
    x_tr = poly_transform.fit_transform(x_tr)

    ngb = NGBoost(
        Base=default_tree_learner,
        Dist=Normal,
        Score=MLE,
        n_estimators=args.n_estimators,
        learning_rate=args.lr,
        natural_gradient=args.natural,
        minibatch_frac=args.minibatch_frac,
        verbose=True,
    )

    ngb.fit(x_tr, y_tr)

    x_te, y_te, _ = gen_data(n=1000, bound=1.3)
    x_te = poly_transform.transform(x_te)
    preds = ngb.pred_dist(x_te)

    pctles, obs, _, _ = calibration_regression(preds, y_te)
github stanfordmlgroup / ngboost / figures / toy.py View on Github external
argparser = ArgumentParser()
    argparser.add_argument("--n-estimators", type=int, default=(1 + BLK * 100))
    argparser.add_argument("--lr", type=float, default=0.03)
    argparser.add_argument("--minibatch-frac", type=float, default=0.1)
    argparser.add_argument("--natural", action="store_true")
    args = argparser.parse_args()

    x_tr, y_tr, _ = gen_data(n=100)

    poly_transform = PolynomialFeatures(1)
    x_tr = poly_transform.fit_transform(x_tr)

    ngb = NGBoost(
        Base=default_tree_learner,
        Dist=Normal,
        Score=MLE,
        n_estimators=args.n_estimators,
        learning_rate=args.lr,
        natural_gradient=args.natural,
        minibatch_frac=args.minibatch_frac,
        verbose=True,
    )

    blk = int(args.n_estimators / 100)
    ngb.fit(x_tr, y_tr)

    x_te, y_te, _ = gen_data(n=1000, bound=1.3)
    x_te = poly_transform.transform(x_te)
    preds = ngb.pred_dist(x_te)

    pctles, obs, _, _ = calibration_regression(preds, y_te)
github stanfordmlgroup / ngboost / figures / toy_single.py View on Github external
if __name__ == "__main__":

    argparser = ArgumentParser()
    argparser.add_argument("--n-estimators", type=int, default=301)
    argparser.add_argument("--lr", type=float, default=0.03)
    argparser.add_argument("--minibatch-frac", type=float, default=0.1)
    argparser.add_argument("--natural", action="store_true")
    args = argparser.parse_args()

    x_tr, y_tr, _ = gen_data(n=50)

    poly_transform = PolynomialFeatures(1)
    x_tr = poly_transform.fit_transform(x_tr)

    ngb = NGBoost(
        Base=default_tree_learner,
        Dist=Normal,
        Score=MLE,
        n_estimators=args.n_estimators,
        learning_rate=args.lr,
        natural_gradient=args.natural,
        minibatch_frac=args.minibatch_frac,
        verbose=True,
    )

    ngb.fit(x_tr, y_tr)

    x_te, y_te, _ = gen_data(n=1000, bound=1.3)
    x_te = poly_transform.transform(x_te)
    preds = ngb.pred_dist(x_te)
github stanfordmlgroup / ngboost / examples / visualizations / compare_distns.py View on Github external
argparser = ArgumentParser()
    argparser.add_argument("--dist", type=str, default="Normal")
    argparser.add_argument("--noise-dist", type=str, default="Normal")
    args = argparser.parse_args()

    m, n = 1000, 50
    if args.noise_dist == "Normal":
        noise = np.random.randn(*(m, 1))
    elif args.noise_dist == "Laplace":
        noise = sp.stats.laplace.rvs(size=(m, 1))
    beta = np.random.randn(n, 1)
    X = np.random.randn(m, n) / np.sqrt(n)
    Y = X @ beta + 0.5 * noise + 20
    print(X.shape, Y.shape)

    ngb = NGBoost(n_estimators=100, learning_rate=1.,
                  Dist=eval(args.dist),
                  Base=default_linear_learner,
                  natural_gradient=True,
                  minibatch_frac=1.0,
                  Score=MLE())
    ngb.fit(X, Y)

    preds = ngb.pred_dist(X)
    print(f"R2: {r2_score(Y, preds.loc):.4f}")

    pctles, observed, slope, intercept = calibration_regression(preds, Y)
    print(observed)
    plt.figure(figsize = (8, 3))
    plt.subplot(1, 2, 1)
    plot_calibration_curve(pctles, observed)
    plt.subplot(1, 2, 2)
github stanfordmlgroup / ngboost / experiments / regression.py View on Github external
# load dataset -- use last column as label
    data = dataset_name_to_loader[args.dataset]()
    X, y = data.iloc[:,:-1].values, data.iloc[:,-1].values

    # set default minibatch fraction based on dataset size
    if not args.minibatch_frac:
        args.minibatch_frac = min(0.8, 5000 / len(X))

    logger = RegressionLogger(args)

    for rep in range(args.n_reps):

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

        ngb = NGBoost(Base=base_name_to_learner[args.base],
                      Dist=Normal,
                      Score=score_name_to_score[args.score],
                      n_estimators=args.n_est,
                      learning_rate=args.lr,
                      natural_gradient=True,
                      second_order=True,
                      quadrant_search=True,
                      minibatch_frac=args.minibatch_frac,
                      nu_penalty=1e-5,
                      normalize_inputs=True,
                      normalize_outputs=True,
                      verbose=args.verbose)

        ngb.fit(X_train, y_train)
        forecast = ngb.pred_dist(X_test)
        logger.tick(forecast, y_test)
github stanfordmlgroup / ngboost / experiments / sim_experiment.py View on Github external
def run_experiments(df_train_filename, df_test_filename, natural_gradient = False,
                   second_order = False, quadrant_search = False):
    df_train = pd.read_csv(df_train_filename)
    df_test = pd.read_csv(df_test_filename)
    Y = np.array(df_train['Y'])
    C = np.array(df_train['C'])
    X = np.array(df_train.drop(['Y', 'C'], axis=1))
    sb = SurvNGBoost(Base = lambda : DecisionTreeRegressor(criterion='mse'),
                     Dist = LogNormal,
                     Score = CRPS_surv,
                     n_estimators = 1000,
                     learning_rate = 0.1,
                     natural_gradient = natural_gradient,
                     second_order = second_order,
                     quadrant_search = quadrant_search,
                     nu_penalty=1e-5)
    loss_train = sb.fit(X, Y, C)
    
    preds_train = sb.pred_mean(X)
    preds_test = sb.pred_mean(df_test.drop(["Y", "C"], axis=1))
    conc_test = calculate_concordance_naive(preds_test, df_test["Y"], df_test["C"])
    test_true_mean = np.mean(df_test["Y"])
    test_pred_mean = np.mean(preds_test)
    return loss_train, conc_test, test_true_mean, test_pred_mean