Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
of Machine Learning Research vol. 15, pp. 1625-1651, 2014.
"""
if inbag is None:
inbag = calc_inbag_modified(X_train.shape[0], forest, is_ensemble)
if not is_ensemble:
pred = np.array([tree.predict(X_test) for tree in forest]).T
else:
pred = np.array([tree.predict(X_test) for tree in forest.model]).T
pred = pred[0]
pred_mean = np.mean(pred, 0)
pred_centered = pred - pred_mean
n_trees = forest.n_estimators
V_IJ = fci._core_computation(X_train, X_test, inbag, pred_centered, n_trees,
memory_constrained, memory_limit)
V_IJ_unbiased = fci._bias_correction(V_IJ, inbag, pred_centered, n_trees)
# Correct for cases where resampling is done without replacement:
if np.max(inbag) == 1:
variance_inflation = 1 / (1 - np.mean(inbag)) ** 2
V_IJ_unbiased *= variance_inflation
if basic_IJ:
return V_IJ
if not calibrate:
return V_IJ_unbiased
if V_IJ_unbiased.shape[0] <= 20:
print("No calibration with n_samples <= 20")
return V_IJ_unbiased
if calibrate:
.. [Wager2014] S. Wager, T. Hastie, B. Efron. "Confidence Intervals for
Random Forests: The Jackknife and the Infinitesimal Jackknife", Journal
of Machine Learning Research vol. 15, pp. 1625-1651, 2014.
"""
if inbag is None:
inbag = calc_inbag_modified(X_train.shape[0], forest, is_ensemble)
if not is_ensemble:
pred = np.array([tree.predict(X_test) for tree in forest]).T
else:
pred = np.array([tree.predict(X_test) for tree in forest.model]).T
pred = pred[0]
pred_mean = np.mean(pred, 0)
pred_centered = pred - pred_mean
n_trees = forest.n_estimators
V_IJ = fci._core_computation(X_train, X_test, inbag, pred_centered, n_trees,
memory_constrained, memory_limit)
V_IJ_unbiased = fci._bias_correction(V_IJ, inbag, pred_centered, n_trees)
# Correct for cases where resampling is done without replacement:
if np.max(inbag) == 1:
variance_inflation = 1 / (1 - np.mean(inbag)) ** 2
V_IJ_unbiased *= variance_inflation
if basic_IJ:
return V_IJ
if not calibrate:
return V_IJ_unbiased
if V_IJ_unbiased.shape[0] <= 20:
print("No calibration with n_samples <= 20")
#results_ss = fci.random_forest_error(new_forest, X_train, X_test,
# calibrate=False,
# memory_constrained=memory_constrained,
# memory_limit=memory_limit)
results_ss = random_forest_error_modified(new_forest, is_ensemble, X_train, X_test,
calibrate=False,
memory_constrained=memory_constrained,
memory_limit=memory_limit)
# Use this second set of variance estimates
# to estimate scale of Monte Carlo noise
sigma2_ss = np.mean((results_ss - V_IJ_unbiased)**2)
delta = n_sample / n_trees
sigma2 = (delta**2 + (1 - delta)**2) / (2 * (1 - delta)**2) * sigma2_ss
# Use Monte Carlo noise scale estimate for empirical Bayes calibration
V_IJ_calibrated = fci.calibration.calibrateEB(V_IJ_unbiased, sigma2)
return V_IJ_calibrated