How to use the lifelines.utils.normalize function in lifelines

To help you get started, we’ve selected a few lifelines examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github CamDavidsonPilon / lifelines / tests / test_estimation.py View on Github external
df = pd.DataFrame(
            {
                "var1": [0.209325, 0.693919, 0.443804, 0.065636, 0.386294],
                "T": [5.269797, 6.601666, 7.335846, 11.684092, 12.678458],
                "strata": [1, 1, 1, 2, 2],
            }
        )
        df["E"] = True
        df["weights"] = 1
        df = df.sort_values(by="T")

        cph = CoxPHFitter()
        cph.fit(df, "T", "E", show_progress=True, weights_col="weights", strata=["strata"])

        df = df.set_index("strata")
        X = normalize(df.drop(["T", "E", "weights"], axis=1), 0, cph._norm_std)

        expected = np.array([[-0.6960789, 1.6729761, 0.3094744, -0.2895864, -0.9967852]]).T
        actual = cph._compute_delta_beta(X, df["T"], df["E"], df["weights"])
        npt.assert_allclose(expected, actual, rtol=0.001)
github CamDavidsonPilon / lifelines / tests / utils / test_utils.py View on Github external
def test_normalize():
    df = load_larynx()
    n, d = df.shape
    npt.assert_almost_equal(utils.normalize(df).mean(0).values, np.zeros(d))
    npt.assert_almost_equal(utils.normalize(df).std(0).values, np.ones(d))
github CamDavidsonPilon / lifelines / tests / utils / test_utils.py View on Github external
def test_unnormalize():
    df = load_larynx()
    m = df.mean(0)
    s = df.std(0)

    ndf = utils.normalize(df)

    npt.assert_almost_equal(df.values, utils.unnormalize(ndf, m, s).values)
github CamDavidsonPilon / lifelines / lifelines / fitters / __init__.py View on Github external
# Known AFT model
            self._norm_mean_ = df[self.regressors[self._primary_parameter_name]].mean(0)
            self._norm_mean_ancillary = df[self.regressors[self._ancillary_parameter_name]].mean(0)

        _norm_std = df.std(0)
        self._constant_cols = pd.Series(
            [(_norm_std.loc[variable_name] < 1e-8) for (_, variable_name) in _index], index=_index
        )
        self._norm_std = pd.Series([_norm_std.loc[variable_name] for (_, variable_name) in _index], index=_index)
        self._norm_std[self._constant_cols] = 1.0
        _norm_std[_norm_std < 1e-8] = 1.0

        _params, self.log_likelihood_, self._hessian_ = self._fit_model(
            log_likelihood_function,
            Ts,
            self._create_Xs_dict(utils.normalize(df, 0, _norm_std)),
            E.values,
            weights.values,
            entries.values,
            show_progress=show_progress,
            initial_point=initial_point,
        )
        self.params_ = _params / self._norm_std

        self.variance_matrix_ = self._compute_variance_matrix()
        self.standard_errors_ = self._compute_standard_errors(
            Ts, E.values, weights.values, entries.values, self._create_Xs_dict(df)
        )
        self.confidence_intervals_ = self._compute_confidence_intervals()

        if self._KNOWN_MODEL:
            # too slow for non-KNOWN models
github CamDavidsonPilon / lifelines / lifelines / fitters / coxph_fitter.py View on Github external
X, T, E, weights, original_index, self._clusters = self._preprocess_dataframe(df)

        self.durations = T.copy()
        self.event_observed = E.copy()
        self.weights = weights.copy()

        if self.strata is not None:
            self.durations.index = original_index
            self.event_observed.index = original_index
            self.weights.index = original_index

        self._norm_mean = X.mean(0)
        self._norm_std = X.std(0)

        hazards_ = self._newton_rhaphson(
            normalize(X, self._norm_mean, self._norm_std),
            T,
            E,
            weights=weights,
            initial_beta=initial_beta,
            show_progress=show_progress,
            step_size=step_size,
        )

        self.hazards_ = pd.DataFrame(hazards_.T, columns=X.columns, index=["coef"]) / self._norm_std

        self.variance_matrix_ = -inv(self._hessian_) / np.outer(self._norm_std, self._norm_std)
        self.standard_errors_ = self._compute_standard_errors(
            normalize(X, self._norm_mean, self._norm_std), T, E, weights
        )
        self.confidence_intervals_ = self._compute_confidence_intervals()
github CamDavidsonPilon / lifelines / lifelines / fitters / coxph_fitter.py View on Github external
hazards_ = self._newton_rhaphson(
            normalize(X, self._norm_mean, self._norm_std),
            T,
            E,
            weights=weights,
            initial_beta=initial_beta,
            show_progress=show_progress,
            step_size=step_size,
        )

        self.hazards_ = pd.DataFrame(hazards_.T, columns=X.columns, index=["coef"]) / self._norm_std

        self.variance_matrix_ = -inv(self._hessian_) / np.outer(self._norm_std, self._norm_std)
        self.standard_errors_ = self._compute_standard_errors(
            normalize(X, self._norm_mean, self._norm_std), T, E, weights
        )
        self.confidence_intervals_ = self._compute_confidence_intervals()

        self.baseline_hazard_ = self._compute_baseline_hazards(X, T, E, weights)
        self.baseline_cumulative_hazard_ = self._compute_baseline_cumulative_hazard()
        self.baseline_survival_ = self._compute_baseline_survival()
        self._predicted_partial_hazards_ = self.predict_partial_hazard(X).values

        return self
github CamDavidsonPilon / lifelines / lifelines / fitters / coxph_fitter.py View on Github external
X = X[order]
            pass_for_numeric_dtypes_or_raise(X)
        elif isinstance(X, pd.Series) and ((X.shape[0] == len(hazard_names) + 2) or (X.shape[0] == len(hazard_names))):
            X = X.to_frame().T
            order = hazard_names
            X = X[order]
            pass_for_numeric_dtypes_or_raise(X)
        elif isinstance(X, pd.Series):
            assert len(hazard_names) == 1, "Series not the correct arugment"
            X = pd.DataFrame(X)
            pass_for_numeric_dtypes_or_raise(X)

        X = X.astype(float)
        index = _get_index(X)

        X = normalize(X, self._norm_mean.values, 1)
        return pd.DataFrame(np.dot(X, self.hazards_.T), index=index)
github CamDavidsonPilon / lifelines / lifelines / fitters / cox_time_varying_fitter.py View on Github external
DataFrame

        Note
        -----
        If X is a DataFrame, the order of the columns do not matter. But
        if X is an array, then the column ordering is assumed to be the
        same as the training dataset.
        """
        if isinstance(X, pd.DataFrame):
            order = self.params_.index
            X = X[order]
            check_for_numeric_dtypes_or_raise(X)

        X = X.astype(float)
        index = _get_index(X)
        X = normalize(X, self._norm_mean.values, 1)
        return pd.DataFrame(np.dot(X, self.params_), index=index)