Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
relative time when a subject entered the study. This is useful for left-truncated (not left-censored) observations. If None, all members of the population
entered study when they were "born": time zero.
weights: an array, or pd.Series, of length n
integer weights per observation
initial_point: (d,) numpy array, optional
initialize the starting point of the iterative
algorithm. Default is the zero vector.
Returns
-------
self
self with new properties like ``cumulative_hazard_``, ``survival_function_``
"""
self.durations = np.asarray(utils.pass_for_numeric_dtypes_or_raise_array(durations))
utils.check_nans_or_infs(self.durations)
utils.check_positivity(self.durations)
return self._fit(
(self.durations, None),
event_observed=event_observed,
timeline=timeline,
label=label,
alpha=alpha,
ci_labels=ci_labels,
show_progress=show_progress,
entry=entry,
weights=weights,
initial_point=initial_point,
)
>>> aft.print_summary()
>>> aft.predict_median(df)
>>>
>>> aft = WeibullAFTFitter()
>>> aft.fit(df, 'T', 'E', ancillary_df=df)
>>> aft.print_summary()
>>> aft.predict_median(df)
"""
self.duration_col = duration_col
self._time_cols = [duration_col]
self.fit_intercept = utils.coalesce(fit_intercept, self.fit_intercept)
df = df.copy()
T = utils.pass_for_numeric_dtypes_or_raise_array(df.pop(self.duration_col)).astype(float)
self.durations = T.copy()
primary_columns = df.columns.difference([self.duration_col, event_col]).tolist()
if isinstance(ancillary_df, pd.DataFrame):
self.model_ancillary = True
assert ancillary_df.shape[0] == df.shape[0], "ancillary_df must be the same shape[0] as df"
regressors = {
self._primary_parameter_name: primary_columns,
self._ancillary_parameter_name: ancillary_df.columns.difference(
[self.duration_col, event_col]
).tolist(),
}
ancillary_cols_to_consider = ancillary_df.columns.difference(df.columns).difference(
[self.duration_col, event_col]
)
) -> "ParametricRegressionFitter":
self._time_fit_was_called = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") + " UTC"
self._n_examples = df.shape[0]
self.weights_col = weights_col
self.entry_col = entry_col
self.event_col = event_col
self.robust = robust
if timeline is not None:
self.timeline = np.sort(np.asarray(timeline).astype(float))
else:
self.timeline = np.unique(utils.coalesce(*Ts))
E = (
utils.pass_for_numeric_dtypes_or_raise_array(df.pop(self.event_col))
if (self.event_col is not None)
else pd.Series(np.ones(self._n_examples, dtype=bool), index=df.index, name="E")
)
weights = (
utils.pass_for_numeric_dtypes_or_raise_array(df.pop(self.weights_col)).astype(float)
if (self.weights_col is not None)
else pd.Series(np.ones(self._n_examples, dtype=float), index=df.index, name="weights")
)
entries = (
utils.pass_for_numeric_dtypes_or_raise_array(df.pop(entry_col)).astype(float)
if (entry_col is not None)
else pd.Series(np.zeros(self._n_examples, dtype=float), index=df.index, name="entry")
)
utils.check_nans_or_infs(E)
>>> 'age': [4, 3, 9, 8, 7, 4, 4, 3, 2, 5, 6, 7],
>>> })
>>>
>>> aft = WeibullAFTFitter()
>>> aft.fit_left_censoring(df, 'T', 'E')
>>> aft.print_summary()
>>> aft.predict_median(df)
>>>
>>> aft = WeibullAFTFitter()
>>> aft.fit_left_censoring(df, 'T', 'E', ancillary_df=df)
>>> aft.print_summary()
>>> aft.predict_median(df)
"""
df = df.copy()
T = utils.pass_for_numeric_dtypes_or_raise_array(df.pop(duration_col)).astype(float)
self.durations = T.copy()
self.fit_intercept = utils.coalesce(fit_intercept, self.fit_intercept)
self.duration_col = duration_col
primary_columns = df.columns.difference([duration_col, event_col]).tolist()
if isinstance(ancillary_df, pd.DataFrame):
self.model_ancillary = True
assert ancillary_df.shape[0] == df.shape[0], "ancillary_df must be the same shape[0] as df"
regressors = {
self._primary_parameter_name: primary_columns,
self._ancillary_parameter_name: ancillary_df.columns.tolist(),
}
df = pd.concat([df, ancillary_df[ancillary_df.columns.difference(df.columns)]], axis=1)
elif (ancillary_df is True) or self.model_ancillary:
self.model_ancillary = True
self.weights_col = weights_col
if (df[weights_col] <= 0).any():
raise ValueError("values in weights_col must be positive.")
df = df.rename(
columns={id_col: "id", event_col: "event", start_col: "start", stop_col: "stop", weights_col: "__weights"}
)
if self.strata is None:
df = df.set_index("id")
else:
df = df.set_index(_to_list(self.strata) + ["id"]) # TODO: needs to be a list
df = df.sort_index()
events, start, stop = (
pass_for_numeric_dtypes_or_raise_array(df.pop("event")).astype(bool),
df.pop("start"),
df.pop("stop"),
)
weights = df.pop("__weights").astype(float)
df = df.astype(float)
self._check_values(df, events, start, stop)
self._norm_mean = df.mean(0)
self._norm_std = df.std(0)
params_ = self._newton_rhaphson(
normalize(df, self._norm_mean, self._norm_std),
events,
start,
stop,
entry: an array, or pd.Series, of length n
relative time when a subject entered the study. This is useful for left-truncated (not left-censored) observations. If None, all members of the population
entered study when they were "born": time zero.
weights: an array, or pd.Series, of length n
integer weights per observation
initial_point: (d,) numpy array, optional
initialize the starting point of the iterative
algorithm. Default is the zero vector.
Returns
-------
self
self with new properties like ``cumulative_hazard_``, ``survival_function_``
"""
self.upper_bound = np.atleast_1d(utils.pass_for_numeric_dtypes_or_raise_array(upper_bound))
self.lower_bound = np.atleast_1d(utils.pass_for_numeric_dtypes_or_raise_array(lower_bound))
utils.check_nans_or_infs(self.lower_bound)
utils.check_positivity(self.upper_bound)
if (self.upper_bound < self.lower_bound).any():
raise ValueError("All upper_bound times must be greater than or equal to lower_bound times.")
if event_observed is None:
event_observed = self.upper_bound == self.lower_bound
if ((self.lower_bound == self.upper_bound) != event_observed).any():
raise ValueError(
"For all rows, lower_bound == upper_bound if and only if event observed = 1 (uncensored). Likewise, lower_bound < upper_bound if and only if event observed = 0 (censored)"
)
self.entry_col = entry_col
self.event_col = event_col
self.robust = robust
if timeline is not None:
self.timeline = np.sort(np.asarray(timeline).astype(float))
else:
self.timeline = np.unique(utils.coalesce(*Ts))
E = (
utils.pass_for_numeric_dtypes_or_raise_array(df.pop(self.event_col))
if (self.event_col is not None)
else pd.Series(np.ones(self._n_examples, dtype=bool), index=df.index, name="E")
)
weights = (
utils.pass_for_numeric_dtypes_or_raise_array(df.pop(self.weights_col)).astype(float)
if (self.weights_col is not None)
else pd.Series(np.ones(self._n_examples, dtype=float), index=df.index, name="weights")
)
entries = (
utils.pass_for_numeric_dtypes_or_raise_array(df.pop(entry_col)).astype(float)
if (entry_col is not None)
else pd.Series(np.zeros(self._n_examples, dtype=float), index=df.index, name="entry")
)
utils.check_nans_or_infs(E)
E = E.astype(bool)
self.event_observed = E.copy()
self.entry = entries.copy()
self.weights = weights.copy()
>>>
>>> aft = WeibullAFTFitter()
>>> aft.fit_interval_censoring(df, 'start', 'stop', 'E', ancillary_df=df)
>>> aft.print_summary()
>>> aft.predict_median(df)
"""
self.lower_bound_col = lower_bound_col
self.upper_bound_col = upper_bound_col
self.fit_intercept = utils.coalesce(fit_intercept, self.fit_intercept)
self._time_cols = [lower_bound_col, upper_bound_col]
df = df.copy()
lower_bound = utils.pass_for_numeric_dtypes_or_raise_array(df.pop(lower_bound_col)).astype(float)
upper_bound = utils.pass_for_numeric_dtypes_or_raise_array(df.pop(upper_bound_col)).astype(float)
if event_col is None:
event_col = "E_lifelines_added"
df[event_col] = lower_bound == upper_bound
if ((lower_bound == upper_bound) != df[event_col]).any():
raise ValueError(
"For all rows, lower_bound == upper_bound if and only if event observed = 1 (uncensored). Likewise, lower_bound < upper_bound if and only if event observed = 0 (censored)"
)
if (lower_bound > upper_bound).any():
raise ValueError("All upper bound measurements must be greater than or equal to lower bound measurements.")
self.lower_bound = lower_bound
self.upper_bound = upper_bound
primary_columns = df.columns.difference([self.lower_bound_col, self.upper_bound_col, event_col]).tolist()