Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
args=(Ts, E, weights, entries, Xs),
options={**{"disp": show_progress}, **self._scipy_fit_options},
)
if show_progress:
print(results)
if results.success:
sum_weights = weights.sum()
# pylint: disable=no-value-for-parameter
hessian_ = hessian(self._neg_likelihood_with_penalty_function)(results.x, Ts, E, weights, entries, Xs)
# See issue https://github.com/CamDavidsonPilon/lifelines/issues/801
hessian_ = (hessian_ + hessian_.T) / 2
return results.x, -sum_weights * results.fun, sum_weights * hessian_
else:
self._check_values_post_fitting(Xs.df, utils.coalesce(Ts[1], Ts[0]), E, weights, entries)
print(results)
raise utils.ConvergenceError(
dedent(
"""\
Fitting did not converge. Try checking the following:
0. Are there any lifelines warnings outputted during the `fit`?
1. Inspect your DataFrame: does everything look as expected?
2. Is there high-collinearity in the dataset? Try using the variance inflation factor (VIF) to find redundant variables.
3. Trying adding a small penalizer (or changing it, if already present). Example: `%s(penalizer=0.01).fit(...)`.
4. Are there any extreme outliers? Try modeling them or dropping them to see if it helps convergence.
"""
% self._class_name
)
h.flat[:: d + 1] -= self.penalizer
# reusing a piece to make g * inv(h) * g.T faster later
try:
inv_h_dot_g_T = spsolve(-h, g, assume_a="pos", check_finite=False)
except (ValueError, np.linalg.LinAlgError) as e:
self._check_values_post_fitting(X, T, E, weights)
if "infs or NaNs" in str(e):
raise ConvergenceError(
"""Hessian or gradient contains nan or inf value(s). Convergence halted. {0}""".format(
CONVERGENCE_DOCS
),
e,
)
elif isinstance(e, np.linalg.LinAlgError):
raise ConvergenceError(
"""Convergence halted due to matrix inversion problems. Suspicion is high collinearity. {0}""".format(
CONVERGENCE_DOCS
),
e,
)
else:
# something else?
raise e
delta = inv_h_dot_g_T
if np.any(np.isnan(delta)):
self._check_values_post_fitting(X, T, E, weights)
raise ConvergenceError(
"""delta contains nan value(s). Convergence halted. {0}""".format(CONVERGENCE_DOCS)
)
try:
# reusing a piece to make g * inv(h) * g.T faster later
inv_h_dot_g_T = spsolve(-h, g, sym_pos=True)
except ValueError as e:
if "infs or NaNs" in str(e):
raise ConvergenceError(
"""hessian or gradient contains nan or inf value(s). Convergence halted. Please see the following tips in the lifelines documentation:
https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model
""",
e,
)
else:
# something else?
raise e
except LinAlgError as e:
raise ConvergenceError(
"""Convergence halted due to matrix inversion problems. Suspicion is high colinearity. Please see the following tips in the lifelines documentation:
https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model
""",
e,
)
delta = step_size * inv_h_dot_g_T
if np.any(np.isnan(delta)):
raise ConvergenceError(
"""delta contains nan value(s). Convergence halted. Please see the following tips in the lifelines documentation:
https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model
"""
)
# Save these as pending result
hessian, gradient = h, g
bounds=self._bounds,
options={**{"disp": show_progress}, **self._scipy_fit_options},
)
# convergence successful.
if results.success:
# pylint: disable=no-value-for-parameter
hessian_ = hessian(negative_log_likelihood)(results.x, Ts, E, entry, weights)
# see issue https://github.com/CamDavidsonPilon/lifelines/issues/801
hessian_ = (hessian_ + hessian_.T) / 2
return results.x, -results.fun * weights.sum(), hessian_ * weights.sum()
# convergence failed.
print(results)
if self._KNOWN_MODEL:
raise utils.ConvergenceError(
dedent(
"""\
Fitting did not converge. This is mostly a lifelines problem, but a few things you can check:
1. Are there any extreme values in the durations column?
- Try scaling your durations to a more reasonable values closer to 1 (multiplying or dividing by some 10^n). If this works,
then likely you just need to specify good initial values with `initial_point` argument in the call to `fit`.
- Try dropping them to see if the model converges.
2. %s may just be a poor model of the data. Try another parametric model.
"""
% self._class_name
)
)
else:
raise utils.ConvergenceError(
elif isinstance(e, np.linalg.LinAlgError):
raise ConvergenceError(
"""Convergence halted due to matrix inversion problems. Suspicion is high collinearity. {0}""".format(
CONVERGENCE_DOCS
),
e,
)
else:
# something else?
raise e
delta = inv_h_dot_g_T
if np.any(np.isnan(delta)):
self._check_values_post_fitting(X, T, E, weights)
raise ConvergenceError(
"""delta contains nan value(s). Convergence halted. {0}""".format(CONVERGENCE_DOCS)
)
# Save these as pending result
hessian, gradient = h, g
norm_delta = norm(delta)
# reusing an above piece to make g * inv(h) * g.T faster.
newton_decrement = g.dot(inv_h_dot_g_T) / 2
if show_progress:
print(
"\rIteration %d: norm_delta = %.5f, step_size = %.4f, ll = %.5f, newton_decrement = %.5f, seconds_since_start = %.1f"
% (i, norm_delta, step_size, ll, newton_decrement, time.time() - start)
)
for _h, _g, _ll in self._partition_by_strata_and_apply(X, T, E, weights, get_gradients, beta):
g += _g
h += _h
ll += _ll
if self.penalizer > 0:
# add the gradient and hessian of the l2 term
g -= self.penalizer * beta.T
h.flat[:: d + 1] -= self.penalizer
# reusing a piece to make g * inv(h) * g.T faster later
try:
inv_h_dot_g_T = spsolve(-h, g.T, sym_pos=True)
except ValueError as e:
if "infs or NaNs" in str(e):
raise ConvergenceError(
"""hessian or gradient contains nan or inf value(s). Convergence halted. Please see the following tips in the lifelines documentation:
https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model
"""
)
else:
# something else?
raise e
delta = step_size * inv_h_dot_g_T
if np.any(np.isnan(delta)):
raise ConvergenceError(
"""delta contains nan value(s). Convergence halted. Please see the following tips in the lifelines documentation:
https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model
"""
)
# this is a neat optimization, the null partial likelihood
# is the same as the full partial but evaluated at zero.
# if the user supplied a non-trivial initial point, we need to delay this.
self._log_likelihood_null = ll
if self.penalizer > 0:
# add the gradient and hessian of the l2 term
g -= self.penalizer * beta
h.flat[:: d + 1] -= self.penalizer
try:
# reusing a piece to make g * inv(h) * g.T faster later
inv_h_dot_g_T = spsolve(-h, g, sym_pos=True)
except ValueError as e:
if "infs or NaNs" in str(e):
raise ConvergenceError(
"""hessian or gradient contains nan or inf value(s). Convergence halted. Please see the following tips in the lifelines documentation:
https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model
""",
e,
)
else:
# something else?
raise e
except LinAlgError as e:
raise ConvergenceError(
"""Convergence halted due to matrix inversion problems. Suspicion is high colinearity. Please see the following tips in the lifelines documentation:
https://lifelines.readthedocs.io/en/latest/Examples.html#problems-with-convergence-in-the-cox-proportional-hazard-model
""",
e,
)