Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_regression(self):
data, target = load_boston(True)
x_train, x_test, y_train, y_test = train_test_split(data, target,
test_size=0.2,
random_state=42)
ngb = NGBoost(Base=default_tree_learner, Dist=Normal, Score=MLE,
natural_gradient=True, verbose=False)
ngb.fit(x_train, y_train)
preds = ngb.predict(x_test)
score = mean_squared_error(y_test, preds)
assert score <= 8.0
def test_classification(self):
data, target = load_breast_cancer(True)
x_train, x_test, y_train, y_test = train_test_split(data, target,
test_size=0.2,
random_state=42)
ngb = NGBoost(Base=default_tree_learner, Dist=Bernoulli, Score=MLE,
verbose=False)
ngb.fit(x_train, y_train)
preds = ngb.pred_dist(x_test)
score = roc_auc_score(y_test, preds.prob)
assert score >= 0.95
def test_classification():
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import roc_auc_score
data, target = load_breast_cancer(True)
x_train, x_test, y_train, y_test = train_test_split(data, target,
test_size=0.2,
random_state=42)
ngb = NGBClassifier(Dist=Bernoulli, verbose=False)
ngb.fit(x_train, y_train)
preds = ngb.predict(x_test)
score = roc_auc_score(y_test, preds)
assert score >= 0.95
def test_regression():
from sklearn.datasets import load_boston
from sklearn.metrics import mean_squared_error
data, target = load_boston(True)
x_train, x_test, y_train, y_test = train_test_split(data, target,
test_size=0.2,
random_state=42)
ngb = NGBRegressor(verbose=False)
ngb.fit(x_train, y_train)
preds = ngb.predict(x_test)
score = mean_squared_error(y_test, preds)
assert score <= 8.0
argparser = ArgumentParser()
argparser.add_argument("--n-estimators", type=int, default=301)
argparser.add_argument("--lr", type=float, default=0.03)
argparser.add_argument("--minibatch-frac", type=float, default=0.1)
argparser.add_argument("--natural", action="store_true")
args = argparser.parse_args()
x_tr, y_tr, _ = gen_data(n=50)
poly_transform = PolynomialFeatures(1)
x_tr = poly_transform.fit_transform(x_tr)
ngb = NGBoost(
Base=default_tree_learner,
Dist=Normal,
Score=MLE,
n_estimators=args.n_estimators,
learning_rate=args.lr,
natural_gradient=args.natural,
minibatch_frac=args.minibatch_frac,
verbose=True,
)
ngb.fit(x_tr, y_tr)
x_te, y_te, _ = gen_data(n=1000, bound=1.3)
x_te = poly_transform.transform(x_te)
preds = ngb.pred_dist(x_te)
pctles, obs, _, _ = calibration_regression(preds, y_te)
argparser = ArgumentParser()
argparser.add_argument("--n-estimators", type=int, default=(1 + BLK * 100))
argparser.add_argument("--lr", type=float, default=0.03)
argparser.add_argument("--minibatch-frac", type=float, default=0.1)
argparser.add_argument("--natural", action="store_true")
args = argparser.parse_args()
x_tr, y_tr, _ = gen_data(n=100)
poly_transform = PolynomialFeatures(1)
x_tr = poly_transform.fit_transform(x_tr)
ngb = NGBoost(
Base=default_tree_learner,
Dist=Normal,
Score=MLE,
n_estimators=args.n_estimators,
learning_rate=args.lr,
natural_gradient=args.natural,
minibatch_frac=args.minibatch_frac,
verbose=True,
)
blk = int(args.n_estimators / 100)
ngb.fit(x_tr, y_tr)
x_te, y_te, _ = gen_data(n=1000, bound=1.3)
x_te = poly_transform.transform(x_te)
preds = ngb.pred_dist(x_te)
pctles, obs, _, _ = calibration_regression(preds, y_te)
if __name__ == "__main__":
argparser = ArgumentParser()
argparser.add_argument("--n-estimators", type=int, default=301)
argparser.add_argument("--lr", type=float, default=0.03)
argparser.add_argument("--minibatch-frac", type=float, default=0.1)
argparser.add_argument("--natural", action="store_true")
args = argparser.parse_args()
x_tr, y_tr, _ = gen_data(n=50)
poly_transform = PolynomialFeatures(1)
x_tr = poly_transform.fit_transform(x_tr)
ngb = NGBoost(
Base=default_tree_learner,
Dist=Normal,
Score=MLE,
n_estimators=args.n_estimators,
learning_rate=args.lr,
natural_gradient=args.natural,
minibatch_frac=args.minibatch_frac,
verbose=True,
)
ngb.fit(x_tr, y_tr)
x_te, y_te, _ = gen_data(n=1000, bound=1.3)
x_te = poly_transform.transform(x_te)
preds = ngb.pred_dist(x_te)
argparser = ArgumentParser()
argparser.add_argument("--dist", type=str, default="Normal")
argparser.add_argument("--noise-dist", type=str, default="Normal")
args = argparser.parse_args()
m, n = 1000, 50
if args.noise_dist == "Normal":
noise = np.random.randn(*(m, 1))
elif args.noise_dist == "Laplace":
noise = sp.stats.laplace.rvs(size=(m, 1))
beta = np.random.randn(n, 1)
X = np.random.randn(m, n) / np.sqrt(n)
Y = X @ beta + 0.5 * noise + 20
print(X.shape, Y.shape)
ngb = NGBoost(n_estimators=100, learning_rate=1.,
Dist=eval(args.dist),
Base=default_linear_learner,
natural_gradient=True,
minibatch_frac=1.0,
Score=MLE())
ngb.fit(X, Y)
preds = ngb.pred_dist(X)
print(f"R2: {r2_score(Y, preds.loc):.4f}")
pctles, observed, slope, intercept = calibration_regression(preds, Y)
print(observed)
plt.figure(figsize = (8, 3))
plt.subplot(1, 2, 1)
plot_calibration_curve(pctles, observed)
plt.subplot(1, 2, 2)
# load dataset -- use last column as label
data = dataset_name_to_loader[args.dataset]()
X, y = data.iloc[:,:-1].values, data.iloc[:,-1].values
# set default minibatch fraction based on dataset size
if not args.minibatch_frac:
args.minibatch_frac = min(0.8, 5000 / len(X))
logger = RegressionLogger(args)
for rep in range(args.n_reps):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
ngb = NGBoost(Base=base_name_to_learner[args.base],
Dist=Normal,
Score=score_name_to_score[args.score],
n_estimators=args.n_est,
learning_rate=args.lr,
natural_gradient=True,
second_order=True,
quadrant_search=True,
minibatch_frac=args.minibatch_frac,
nu_penalty=1e-5,
normalize_inputs=True,
normalize_outputs=True,
verbose=args.verbose)
ngb.fit(X_train, y_train)
forecast = ngb.pred_dist(X_test)
logger.tick(forecast, y_test)
def run_experiments(df_train_filename, df_test_filename, natural_gradient = False,
second_order = False, quadrant_search = False):
df_train = pd.read_csv(df_train_filename)
df_test = pd.read_csv(df_test_filename)
Y = np.array(df_train['Y'])
C = np.array(df_train['C'])
X = np.array(df_train.drop(['Y', 'C'], axis=1))
sb = SurvNGBoost(Base = lambda : DecisionTreeRegressor(criterion='mse'),
Dist = LogNormal,
Score = CRPS_surv,
n_estimators = 1000,
learning_rate = 0.1,
natural_gradient = natural_gradient,
second_order = second_order,
quadrant_search = quadrant_search,
nu_penalty=1e-5)
loss_train = sb.fit(X, Y, C)
preds_train = sb.pred_mean(X)
preds_test = sb.pred_mean(df_test.drop(["Y", "C"], axis=1))
conc_test = calculate_concordance_naive(preds_test, df_test["Y"], df_test["C"])
test_true_mean = np.mean(df_test["Y"])
test_pred_mean = np.mean(preds_test)
return loss_train, conc_test, test_true_mean, test_pred_mean