Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
mse /= n_folds
mse_list.append(mse)
return mse_list
if __name__ == "__main__":
data = load_boston()
X, y = data["data"], data["target"]
C = np.zeros(len(y))
X_train, X_test, y_train, y_test, C_train, C_test = train_test_split(X, y, C)
#n_estimators_list = [10,20]
fold_num = 10
print("*"*6 + " Heteroskedastic Distributions with MLE [Orthant Search] " + "*"*6)
n_estimators_list = [10,50,80,100,120,150,200,300,500,800,1000,1500]
het_q_mle = cv_n_estimators(X_train, y_train, C_train, cv_list = n_estimators_list, \
n_folds=fold_num, distrib = Normal, quadrant = True, s=MLE_surv)
#optimal_het_q_mle = n_estimators_list[np.argmin(het_q_mle)]
optimal_het_q_mle = n_estimators_list[np.argmax(het_q_mle)]
print("--- Cross Validation MSE ---")
print(het_q_mle)
print("--- Optimal parameter for Heteroskedastic Distributions with MLE [Orthan Search] ---")
print(optimal_het_q_mle)
print("*"*6 + " Homoskedastic Distributions with MLE [Orthan Search] " + "*"*6)
hom_q_mle = cv_n_estimators(X_train, y_train, C_train, cv_list = n_estimators_list, \
n_folds=fold_num, distrib = HomoskedasticNormal, quadrant = True, s = MLE_surv)
optimal_hom_q_mle = n_estimators_list[np.argmax(hom_q_mle)]
print("--- Cross Validation MSE ---")
print(hom_q_mle)
print("--- Optimal parameter for Heteroskedastic Distributions with MLE [Orthan Search] ---")
print(optimal_hom_q_mle)
quadrant_search = False,
nu_penalty=1e-5)
sb3 = SurvNGBoost(Base = lambda : DecisionTreeRegressor(criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3),
Dist = HomoskedasticNormal,
Score = MLE_surv,
n_estimators = 100,
learning_rate = 0.1,
natural_gradient = True,
second_order = False,
quadrant_search = False,
nu_penalty=1e-5)
sb4 = SurvNGBoost(Base = lambda : DecisionTreeRegressor(criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3),
Dist = HomoskedasticNormal,
Score = MLE_surv,
n_estimators = 100,
learning_rate = 0.1,
natural_gradient = True,
second_order = True,
quadrant_search = False,
nu_penalty=1e-5)
gbm = GradientBoostingRegressor(n_estimators = 100, learning_rate = 0.1)
train, test = sb1.fit(X[:700], Y[:700], C[:700], X[700:], Y[700:], C[700:])
train, test = sb2.fit(X[:700], Y[:700], C[:700], X[700:], Y[700:], C[700:])
train, test = sb3.fit(X[:700], Y[:700], C[:700], X[700:], Y[700:], C[700:])
train, test = sb4.fit(X[:700], Y[:700], C[:700], X[700:], Y[700:], C[700:])
preds1 = sb1.pred_mean(X)
preds2 = sb2.pred_mean(X)
def main():
mu, logstd = 0., np.log(1.)
print('True mu=%.03f std=%.03f' % (mu, np.exp(logstd)))
for frac in [0.01, 0.05, 0.1, 0.5, 0.9, 0.95, 0.99]:
Y, C = generate_data(10000, frac_cens=frac, mu=mu, logstd=logstd)
#print(Y[C==0].mean(), Y[C==1].mean())
print('==== Censoring fraction %.2f ====' % torch.mean(C))
mle_mu, mle_std = fit(Y, C, MLE_surv, mu_init=0., logstd_init=0.)
print('MLE mu=%.03f std=%.03f' % (mle_mu, mle_std))
crps_mu, crps_std = fit(Y, C, CRPS_surv, mu_init=0., logstd_init=0.)
print('CRPS mu=%.03f std=%.03f' % (crps_mu, crps_std))
print(optimal_hom_q_mle)
print("*"*6 + " Heteroskedastic Distributions with MLE [Line Search] " + "*"*6)
het_l_mle = cv_n_estimators(X_train, y_train, C_train, cv_list = n_estimators_list, \
n_folds=fold_num, distrib = Normal, quadrant = False, s=MLE_surv)
optimal_het_l_mle = n_estimators_list[np.argmax(het_l_mle)]
print("--- Cross Validation MSE ---")
print(het_l_mle)
print("--- Optimal parameter for Heteroskedastic Distributions with MLE [Line Search] ---")
print(optimal_het_l_mle)
print("*"*6 + " Homoskedastic Distributions with MLE [Line Search] " + "*"*6)
hom_l_mle = cv_n_estimators(X_train, y_train, C_train, cv_list = n_estimators_list, \
n_folds=fold_num, distrib = HomoskedasticNormal, quadrant = False, s = MLE_surv)
optimal_hom_l_mle = n_estimators_list[np.argmax(hom_l_mle)]
print("--- Cross Validation MSE ---")
print(hom_l_mle)
print("--- Optimal parameter for Heteroskedastic Distributions with MLE [Line Search] ---")
print(optimal_hom_l_mle)
print("*"*6 + " Heteroskedastic Distributions with CRPS [Orthan Search] " + "*"*6)
het_q_crps = cv_n_estimators(X_train, y_train, C_train, cv_list = n_estimators_list, \
n_folds=fold_num, distrib = Normal, quadrant = True, s=CRPS_surv)
optimal_het_q_crps = n_estimators_list[np.argmax(het_q_crps)]
print("--- Cross Validation MSE ---")
print(het_q_crps)
print("--- Optimal parameter for Heteroskedastic Distributions with CRPS [Orthan Search] ---")
print(optimal_het_q_crps)
m, n = 1000, 5
X = np.random.rand(m, n).astype(np.float32) + 1
#Y = np.random.rand(m).astype(np.float32) * 2 + 1
Y = np.sum(X, axis=1)
#Y = (Y - np.mean(Y)) / np.std(Y)
#Y = Y - np.min(Y) + 1e-2
C = (np.random.rand(m) > 1.5).astype(np.float32)
# C = np.zeros_like(Y)
#X = simulate_X(num_unif=30, num_bi=30, N=1000, num_normal=30, normal_cov_strength=[0.5]*30)
#Y, C = simulate_Y_C(X)
print(X.shape, Y.shape, C.shape)
print('Censoring fraction: %f' % (np.mean(C)))
sb1 = SurvNGBoost(Base = lambda : DecisionTreeRegressor(criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3),
Dist = HomoskedasticNormal,
Score = MLE_surv,
n_estimators = 100,
learning_rate = 0.1,
natural_gradient = False,
second_order = False,
quadrant_search = False,
nu_penalty=1e-5)
sb2 = SurvNGBoost(Base = lambda : DecisionTreeRegressor(criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3),
Dist = HomoskedasticNormal,
Score = MLE_surv,
n_estimators = 100,
learning_rate = 0.1,
natural_gradient = False,
second_order = True,
quadrant_search = False,
nu_penalty=1e-5)