Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
argparser.add_argument("--noise-dist", type=str, default="Normal")
args = argparser.parse_args()
m, n = 1000, 50
if args.noise_dist == "Normal":
noise = np.random.randn(*(m, 1))
elif args.noise_dist == "Laplace":
noise = sp.stats.laplace.rvs(size=(m, 1))
beta = np.random.randn(n, 1)
X = np.random.randn(m, n) / np.sqrt(n)
Y = X @ beta + 0.5 * noise + 20
print(X.shape, Y.shape)
ngb = NGBoost(n_estimators=100, learning_rate=1.,
Dist=eval(args.dist),
Base=default_linear_learner,
natural_gradient=True,
minibatch_frac=1.0,
Score=MLE())
ngb.fit(X, Y)
preds = ngb.pred_dist(X)
print(f"R2: {r2_score(Y, preds.loc):.4f}")
pctles, observed, slope, intercept = calibration_regression(preds, Y)
print(observed)
plt.figure(figsize = (8, 3))
plt.subplot(1, 2, 1)
plot_calibration_curve(pctles, observed)
plt.subplot(1, 2, 2)
plot_pit_histogram(pctles, observed)
plt.tight_layout()
T = X @ np.ones((n,)) + 0.5 * np.random.randn(*(m,)) + args.eps
E = (T > Y).astype(int)
print(X.shape, Y.shape, E.shape)
print(f"Event rate: {np.mean(E):.2f}")
X_tr, X_te, Y_tr, Y_te, T_tr, T_te, E_tr, E_te = train_test_split(
X, Y, T, E, test_size=0.2
)
ngb = NGBSurvival(
Dist=Exponential,
n_estimators=args.n_estimators,
learning_rate=args.lr,
natural_gradient=True,
Base=default_linear_learner,
Score=MLE,
verbose=True,
verbose_eval=True,
)
train_losses = ngb.fit(X_tr, np.exp(np.minimum(Y_tr, T_tr)), E_tr)
preds = ngb.pred_dist(X_te)
print(f"R2: {r2_score(Y_te, np.log(preds.mean()))}")
plt.hist(preds.mean(), range=(0, 10), bins=30, alpha=0.5, label="Pred")
plt.hist(np.exp(Y_te), range=(0, 10), bins=30, alpha=0.5, label="True")
plt.legend()
plt.show()
# since we simulated the data we fully observe all outcomes
# calibration assuming complete observations
m, n = 1200, 50
noise = np.random.randn(*(m, 1))
beta1 = np.random.randn(n, 1)
X = np.random.randn(m, n) / np.sqrt(n)
Y = (X @ beta1 + args.noise_lvl * noise).squeeze()
print(X.shape, Y.shape)
X_train, X_test = X[:1000, :], X[1000:,]
Y_train, Y_test = Y[:1000], Y[1000:]
ngb = NGBoost(
n_estimators=400,
learning_rate=args.lr,
Dist=Normal,
Base=default_linear_learner,
natural_gradient=args.natural,
minibatch_frac=1.0,
Score=eval(args.score)(),
verbose=True,
verbose_eval=100,
)
losses = ngb.fit(X_train, Y_train)
forecast = ngb.pred_dist(X_test)
print("R2:", r2_score(Y_test, forecast.loc))
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import PolynomialFeatures
from dfply import *
from ngboost.distns import LogNormal, Exponential, MultivariateNormal
from ngboost.api import NGBSurvival
from ngboost.scores import MLE, CRPS
from ngboost.learners import default_tree_learner, default_linear_learner
from ngboost.evaluation import *
from sksurv.ensemble import GradientBoostingSurvivalAnalysis as GBSA
from sksurv.metrics import concordance_index_censored
np.random.seed(1)
base_name_to_learner = {
"tree": default_tree_learner,
"linear": default_linear_learner,
}
def Y_join(T, E):
col_event = "Event"
col_time = "Time"
y = np.empty(dtype=[(col_event, np.bool), (col_time, np.float64)], shape=T.shape[0])
y[col_event] = E.values
y[col_time] = T.values
return y
if __name__ == "__main__":
argparser = ArgumentParser()
argparser.add_argument("--dataset", type=str, default="flchain")
dataset_name_to_loader = {
"housing": lambda: pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data', header=None, delim_whitespace=True),
"concrete": lambda: pd.read_excel("https://archive.ics.uci.edu/ml/machine-learning-databases/concrete/compressive/Concrete_Data.xls"),
"wine": lambda: pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv', delimiter=";"),
"kin8nm": lambda: pd.read_csv("data/uci/kin8nm.csv"),
"naval": lambda: pd.read_csv("data/uci/naval-propulsion.txt", delim_whitespace=True, header=None).iloc[:,:-1],
"power": lambda: pd.read_excel("data/uci/power-plant.xlsx"),
"energy": lambda: pd.read_excel("https://archive.ics.uci.edu/ml/machine-learning-databases/00242/ENB2012_data.xlsx").iloc[:,:-1],
"protein": lambda: pd.read_csv("data/uci/protein.csv")[['F1', 'F2', 'F3', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9', 'RMSD']],
"yacht": lambda: pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/00243/yacht_hydrodynamics.data", header=None, delim_whitespace=True),
}
base_name_to_learner = {
"tree": default_tree_learner,
"linear": default_linear_learner,
}
score_name_to_score = {
"MLE": MLE,
"CRPS": CRPS,
}
class RegressionLogger(object):
def __init__(self, args):
self.args = args
self.verbose = args.verbose
self.r2s = []
self.mses = []
self.nlls = []
self.calib_scores = []
"https://archive.ics.uci.edu/ml/machine-learning-databases/00242/ENB2012_data.xlsx"
).iloc[:, :-1],
"protein": lambda: pd.read_csv("data/uci/protein.csv")[
["F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "RMSD"]
],
"yacht": lambda: pd.read_csv(
"http://archive.ics.uci.edu/ml/machine-learning-databases/00243/yacht_hydrodynamics.data",
header=None,
delim_whitespace=True,
),
"msd": lambda: pd.read_csv("data/uci/YearPredictionMSD.txt").iloc[:, ::-1],
}
base_name_to_learner = {
"tree": default_tree_learner,
"linear": default_linear_learner,
}
if __name__ == "__main__":
argparser = ArgumentParser()
argparser.add_argument("--dataset", type=str, default="concrete")
argparser.add_argument("--reps", type=int, default=5)
argparser.add_argument("--n-est", type=int, default=2000)
argparser.add_argument("--n-splits", type=int, default=20)
argparser.add_argument("--distn", type=str, default="Normal")
argparser.add_argument("--lr", type=float, default=0.01)
argparser.add_argument("--natural", action="store_true")
argparser.add_argument("--score", type=str, default="MLE")
argparser.add_argument("--base", type=str, default="tree")
argparser.add_argument("--minibatch-frac", type=float, default=None)