Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def run_experiments(df_train_filename, df_test_filename, natural_gradient = False,
second_order = False, quadrant_search = False):
df_train = pd.read_csv(df_train_filename)
df_test = pd.read_csv(df_test_filename)
Y = np.array(df_train['Y'])
C = np.array(df_train['C'])
X = np.array(df_train.drop(['Y', 'C'], axis=1))
sb = SurvNGBoost(Base = lambda : DecisionTreeRegressor(criterion='mse'),
Dist = LogNormal,
Score = CRPS_surv,
n_estimators = 1000,
learning_rate = 0.1,
natural_gradient = natural_gradient,
second_order = second_order,
quadrant_search = quadrant_search,
nu_penalty=1e-5)
loss_train = sb.fit(X, Y, C)
preds_train = sb.pred_mean(X)
preds_test = sb.pred_mean(df_test.drop(["Y", "C"], axis=1))
conc_test = calculate_concordance_naive(preds_test, df_test["Y"], df_test["C"])
test_true_mean = np.mean(df_test["Y"])
test_pred_mean = np.mean(preds_test)
return loss_train, conc_test, test_true_mean, test_pred_mean
natural_gradient = False,
second_order = True,
quadrant_search = False,
nu_penalty=1e-5)
sb3 = SurvNGBoost(Base = lambda : DecisionTreeRegressor(criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3),
Dist = HomoskedasticNormal,
Score = MLE_surv,
n_estimators = 100,
learning_rate = 0.1,
natural_gradient = True,
second_order = False,
quadrant_search = False,
nu_penalty=1e-5)
sb4 = SurvNGBoost(Base = lambda : DecisionTreeRegressor(criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3),
Dist = HomoskedasticNormal,
Score = MLE_surv,
n_estimators = 100,
learning_rate = 0.1,
natural_gradient = True,
second_order = True,
quadrant_search = False,
nu_penalty=1e-5)
gbm = GradientBoostingRegressor(n_estimators = 100, learning_rate = 0.1)
train, test = sb1.fit(X[:700], Y[:700], C[:700], X[700:], Y[700:], C[700:])
train, test = sb2.fit(X[:700], Y[:700], C[:700], X[700:], Y[700:], C[700:])
train, test = sb3.fit(X[:700], Y[:700], C[:700], X[700:], Y[700:], C[700:])
train, test = sb4.fit(X[:700], Y[:700], C[:700], X[700:], Y[700:], C[700:])
def run_survboost():
df_train = pd.read_csv("data/simulated/sim_data_train.csv")
df_test = pd.read_csv("data/simulated/sim_data_test.csv")
sb = SurvNGBoost(Base = lambda: DecisionTreeRegressor(criterion='mse'),
Dist = LogNormal,
Score = CRPS_surv,
n_estimators = 200)
sb.fit(df_train.drop(["Y", "C"], axis=1).as_matrix(),
df_train["Y"], df_train["C"])
preds_test = sb.pred_mean(df_test.drop(["Y", "C"], axis=1))
np.save("data/simulated/sim_preds_survboost.npy", preds_test)
natural_gradient = False,
second_order = False,
quadrant_search = False,
nu_penalty=1e-5)
sb2 = SurvNGBoost(Base = lambda : DecisionTreeRegressor(criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3),
Dist = HomoskedasticNormal,
Score = MLE_surv,
n_estimators = 100,
learning_rate = 0.1,
natural_gradient = False,
second_order = True,
quadrant_search = False,
nu_penalty=1e-5)
sb3 = SurvNGBoost(Base = lambda : DecisionTreeRegressor(criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3),
Dist = HomoskedasticNormal,
Score = MLE_surv,
n_estimators = 100,
learning_rate = 0.1,
natural_gradient = True,
second_order = False,
quadrant_search = False,
nu_penalty=1e-5)
sb4 = SurvNGBoost(Base = lambda : DecisionTreeRegressor(criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3),
Dist = HomoskedasticNormal,
Score = MLE_surv,
n_estimators = 100,
learning_rate = 0.1,
natural_gradient = True,
second_order = True,
def main():
m, n = 1000, 5
X = np.random.rand(m, n).astype(np.float32) + 1
#Y = np.random.rand(m).astype(np.float32) * 2 + 1
Y = np.sum(X, axis=1)
#Y = (Y - np.mean(Y)) / np.std(Y)
#Y = Y - np.min(Y) + 1e-2
C = (np.random.rand(m) > 1.5).astype(np.float32)
# C = np.zeros_like(Y)
#X = simulate_X(num_unif=30, num_bi=30, N=1000, num_normal=30, normal_cov_strength=[0.5]*30)
#Y, C = simulate_Y_C(X)
print(X.shape, Y.shape, C.shape)
print('Censoring fraction: %f' % (np.mean(C)))
sb1 = SurvNGBoost(Base = lambda : DecisionTreeRegressor(criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3),
Dist = HomoskedasticNormal,
Score = MLE_surv,
n_estimators = 100,
learning_rate = 0.1,
natural_gradient = False,
second_order = False,
quadrant_search = False,
nu_penalty=1e-5)
sb2 = SurvNGBoost(Base = lambda : DecisionTreeRegressor(criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3),
Dist = HomoskedasticNormal,
Score = MLE_surv,
n_estimators = 100,
learning_rate = 0.1,
natural_gradient = False,
second_order = True,
#X = simulate_X(num_unif=30, num_bi=30, N=1000, num_normal=30, normal_cov_strength=[0.5]*30)
#Y, C = simulate_Y_C(X)
print(X.shape, Y.shape, C.shape)
print('Censoring fraction: %f' % (np.mean(C)))
sb1 = SurvNGBoost(Base = lambda : DecisionTreeRegressor(criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3),
Dist = HomoskedasticNormal,
Score = MLE_surv,
n_estimators = 100,
learning_rate = 0.1,
natural_gradient = False,
second_order = False,
quadrant_search = False,
nu_penalty=1e-5)
sb2 = SurvNGBoost(Base = lambda : DecisionTreeRegressor(criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3),
Dist = HomoskedasticNormal,
Score = MLE_surv,
n_estimators = 100,
learning_rate = 0.1,
natural_gradient = False,
second_order = True,
quadrant_search = False,
nu_penalty=1e-5)
sb3 = SurvNGBoost(Base = lambda : DecisionTreeRegressor(criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3),
Dist = HomoskedasticNormal,
Score = MLE_surv,
n_estimators = 100,
learning_rate = 0.1,
natural_gradient = True,
second_order = False,