How to use the ngboost.learners.default_linear_learner function in ngboost

To help you get started, we’ve selected a few ngboost examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github stanfordmlgroup / ngboost / examples / visualizations / compare_distns.py View on Github external
argparser.add_argument("--noise-dist", type=str, default="Normal")
    args = argparser.parse_args()

    m, n = 1000, 50
    if args.noise_dist == "Normal":
        noise = np.random.randn(*(m, 1))
    elif args.noise_dist == "Laplace":
        noise = sp.stats.laplace.rvs(size=(m, 1))
    beta = np.random.randn(n, 1)
    X = np.random.randn(m, n) / np.sqrt(n)
    Y = X @ beta + 0.5 * noise + 20
    print(X.shape, Y.shape)

    ngb = NGBoost(n_estimators=100, learning_rate=1.,
                  Dist=eval(args.dist),
                  Base=default_linear_learner,
                  natural_gradient=True,
                  minibatch_frac=1.0,
                  Score=MLE())
    ngb.fit(X, Y)

    preds = ngb.pred_dist(X)
    print(f"R2: {r2_score(Y, preds.loc):.4f}")

    pctles, observed, slope, intercept = calibration_regression(preds, Y)
    print(observed)
    plt.figure(figsize = (8, 3))
    plt.subplot(1, 2, 1)
    plot_calibration_curve(pctles, observed)
    plt.subplot(1, 2, 2)
    plot_pit_histogram(pctles, observed)
    plt.tight_layout()
github stanfordmlgroup / ngboost / examples / simulations / survival_sim.py View on Github external
T = X @ np.ones((n,)) + 0.5 * np.random.randn(*(m,)) + args.eps
    E = (T > Y).astype(int)

    print(X.shape, Y.shape, E.shape)
    print(f"Event rate: {np.mean(E):.2f}")

    X_tr, X_te, Y_tr, Y_te, T_tr, T_te, E_tr, E_te = train_test_split(
        X, Y, T, E, test_size=0.2
    )

    ngb = NGBSurvival(
        Dist=Exponential,
        n_estimators=args.n_estimators,
        learning_rate=args.lr,
        natural_gradient=True,
        Base=default_linear_learner,
        Score=MLE,
        verbose=True,
        verbose_eval=True,
    )
    train_losses = ngb.fit(X_tr, np.exp(np.minimum(Y_tr, T_tr)), E_tr)

    preds = ngb.pred_dist(X_te)
    print(f"R2: {r2_score(Y_te, np.log(preds.mean()))}")

    plt.hist(preds.mean(), range=(0, 10), bins=30, alpha=0.5, label="Pred")
    plt.hist(np.exp(Y_te), range=(0, 10), bins=30, alpha=0.5, label="True")
    plt.legend()
    plt.show()

    # since we simulated the data we fully observe all outcomes
    # calibration assuming complete observations
github stanfordmlgroup / ngboost / examples / simulations / regression_sim.py View on Github external
m, n = 1200, 50
    noise = np.random.randn(*(m, 1))
    beta1 = np.random.randn(n, 1)
    X = np.random.randn(m, n) / np.sqrt(n)
    Y = (X @ beta1 + args.noise_lvl * noise).squeeze()
    print(X.shape, Y.shape)

    X_train, X_test = X[:1000, :], X[1000:,]
    Y_train, Y_test = Y[:1000], Y[1000:]

    ngb = NGBoost(
        n_estimators=400,
        learning_rate=args.lr,
        Dist=Normal,
        Base=default_linear_learner,
        natural_gradient=args.natural,
        minibatch_frac=1.0,
        Score=eval(args.score)(),
        verbose=True,
        verbose_eval=100,
    )

    losses = ngb.fit(X_train, Y_train)
    forecast = ngb.pred_dist(X_test)
    print("R2:", r2_score(Y_test, forecast.loc))
github stanfordmlgroup / ngboost / examples / experiments / survival_exp.py View on Github external
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import PolynomialFeatures
from dfply import *
from ngboost.distns import LogNormal, Exponential, MultivariateNormal
from ngboost.api import NGBSurvival
from ngboost.scores import MLE, CRPS
from ngboost.learners import default_tree_learner, default_linear_learner
from ngboost.evaluation import *
from sksurv.ensemble import GradientBoostingSurvivalAnalysis as GBSA
from sksurv.metrics import concordance_index_censored

np.random.seed(1)

base_name_to_learner = {
    "tree": default_tree_learner,
    "linear": default_linear_learner,
}


def Y_join(T, E):
    col_event = "Event"
    col_time = "Time"
    y = np.empty(dtype=[(col_event, np.bool), (col_time, np.float64)], shape=T.shape[0])
    y[col_event] = E.values
    y[col_time] = T.values
    return y


if __name__ == "__main__":

    argparser = ArgumentParser()
    argparser.add_argument("--dataset", type=str, default="flchain")
github stanfordmlgroup / ngboost / experiments / regression.py View on Github external
dataset_name_to_loader = {
    "housing": lambda: pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data', header=None, delim_whitespace=True),
    "concrete": lambda: pd.read_excel("https://archive.ics.uci.edu/ml/machine-learning-databases/concrete/compressive/Concrete_Data.xls"),
    "wine": lambda: pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv', delimiter=";"),
    "kin8nm": lambda: pd.read_csv("data/uci/kin8nm.csv"),
    "naval": lambda: pd.read_csv("data/uci/naval-propulsion.txt", delim_whitespace=True, header=None).iloc[:,:-1],
    "power": lambda: pd.read_excel("data/uci/power-plant.xlsx"),
    "energy": lambda: pd.read_excel("https://archive.ics.uci.edu/ml/machine-learning-databases/00242/ENB2012_data.xlsx").iloc[:,:-1],
    "protein": lambda: pd.read_csv("data/uci/protein.csv")[['F1', 'F2', 'F3', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9', 'RMSD']],
    "yacht": lambda: pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/00243/yacht_hydrodynamics.data", header=None, delim_whitespace=True),
}

base_name_to_learner = {
    "tree": default_tree_learner,
    "linear": default_linear_learner,
}

score_name_to_score = {
    "MLE": MLE,
    "CRPS": CRPS,
}

class RegressionLogger(object):

    def __init__(self, args):
        self.args = args
        self.verbose = args.verbose
        self.r2s = []
        self.mses = []
        self.nlls = []
        self.calib_scores = []
github stanfordmlgroup / ngboost / examples / experiments / regression_exp.py View on Github external
"https://archive.ics.uci.edu/ml/machine-learning-databases/00242/ENB2012_data.xlsx"
    ).iloc[:, :-1],
    "protein": lambda: pd.read_csv("data/uci/protein.csv")[
        ["F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "RMSD"]
    ],
    "yacht": lambda: pd.read_csv(
        "http://archive.ics.uci.edu/ml/machine-learning-databases/00243/yacht_hydrodynamics.data",
        header=None,
        delim_whitespace=True,
    ),
    "msd": lambda: pd.read_csv("data/uci/YearPredictionMSD.txt").iloc[:, ::-1],
}

base_name_to_learner = {
    "tree": default_tree_learner,
    "linear": default_linear_learner,
}


if __name__ == "__main__":

    argparser = ArgumentParser()
    argparser.add_argument("--dataset", type=str, default="concrete")
    argparser.add_argument("--reps", type=int, default=5)
    argparser.add_argument("--n-est", type=int, default=2000)
    argparser.add_argument("--n-splits", type=int, default=20)
    argparser.add_argument("--distn", type=str, default="Normal")
    argparser.add_argument("--lr", type=float, default=0.01)
    argparser.add_argument("--natural", action="store_true")
    argparser.add_argument("--score", type=str, default="MLE")
    argparser.add_argument("--base", type=str, default="tree")
    argparser.add_argument("--minibatch-frac", type=float, default=None)