How to use the lifelines.CoxPHFitter function in lifelines

To help you get started, we’ve selected a few lifelines examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github CamDavidsonPilon / lifelines / tests / test_estimation.py View on Github external
def test_print_summary_with_decimals(self, rossi, cph):
        import sys

        saved_stdout = sys.stdout
        try:

            out = StringIO()
            sys.stdout = out

            cph = CoxPHFitter()
            cph.fit(rossi, duration_col="week", event_col="arrest", batch_mode=True)
            cph._time_fit_was_called = "2018-10-23 02:40:45 UTC"
            cph.print_summary(decimals=1)
            output_dec_1 = out.getvalue().strip().split()

            cph.print_summary(decimals=3)
            output_dec_3 = out.getvalue().strip().split()

            assert output_dec_1 != output_dec_3
        finally:
            sys.stdout = saved_stdout
            cph.fit(rossi, duration_col="week", event_col="arrest", batch_mode=False)
github CamDavidsonPilon / lifelines / tests / test_statistics.py View on Github external
def test_proportional_hazard_test_with_log_transform():
    cph = CoxPHFitter()
    df = load_regression_dataset()
    cph.fit(df, "T", "E")

    results = stats.proportional_hazard_test(cph, df, time_transform="log")
    npt.assert_allclose(results.summary.loc["var1"]["test_statistic"], 2.227627, rtol=1e-3)
    npt.assert_allclose(results.summary.loc["var2"]["test_statistic"], 0.714427, rtol=1e-3)
    npt.assert_allclose(results.summary.loc["var3"]["test_statistic"], 1.466321, rtol=1e-3)
    npt.assert_allclose(results.summary.loc["var3"]["p"], 0.225927, rtol=1e-3)
github CamDavidsonPilon / lifelines / tests / test_estimation.py View on Github external
r$var
        r$naive.var
        residuals(r, type='dfbeta')
        """

        df = pd.DataFrame(
            {
                "var1": [0.209325, 0.693919, 0.443804, 0.065636, 0.386294],
                "var2": [0.184677, 0.071893, 1.364646, 0.098375, 1.663092],
                "T": [1, 2, 3, 4, 5],
                "var3": [2, 2, 2, 1, 2],
            }
        )
        df["E"] = 1

        cph = CoxPHFitter()
        cph.fit(df, "T", "E", robust=True, weights_col="var3", show_progress=True)
        expected = pd.Series({"var1": 1.431, "var2": -1.277})
        assert_series_equal(cph.hazards_.T["coef"], expected, check_less_precise=2, check_names=False)

        expected_cov = np.array([[3.5439245, -0.3549099], [-0.3549099, 0.4499553]])
        npt.assert_array_almost_equal(
            cph.variance_matrix_, expected_cov, decimal=1
        )  # not as precise because matrix inversion will accumulate estimation errors.

        expected = pd.Series({"var1": 2.094, "var2": 0.452})
        assert_series_equal(cph.summary["se(coef)"], expected, check_less_precise=2, check_names=False)
github CamDavidsonPilon / lifelines / tests / test_estimation.py View on Github external
def test_output_with_strata_against_R(self, rossi):
        """
        rossi <- read.csv('.../lifelines/datasets/rossi.csv')
        r = coxph(formula = Surv(week, arrest) ~ fin + age + strata(race,
                    paro, mar, wexp) + prio, data = rossi)
        """
        expected = np.array([[-0.3355, -0.0590, 0.1002]])
        cf = CoxPHFitter()
        cf.fit(
            rossi, duration_col="week", event_col="arrest", strata=["race", "paro", "mar", "wexp"], show_progress=True
        )
        npt.assert_array_almost_equal(cf.hazards_.values, expected, decimal=4)
github CamDavidsonPilon / lifelines / tests / test_estimation.py View on Github external
def test_p_value_against_Survival_Analysis_by_John_Klein_and_Melvin_Moeschberger(self):
        # see table 8.1 in Survival Analysis by John P. Klein and Melvin L. Moeschberger, Second Edition
        df = load_larynx()
        cf = CoxPHFitter()
        cf.fit(df, duration_col="time", event_col="death")

        # p-values
        actual_p = cf._compute_p_values()
        expected_p = np.array([0.1847, 0.7644, 0.0730, 0.00])
        npt.assert_array_almost_equal(actual_p, expected_p, decimal=2)
github CamDavidsonPilon / lifelines / tests / test_estimation.py View on Github external
def test_crossval_for_cox_ph_with_normalizing_times(self, data_pred2, data_pred1):
        cf = CoxPHFitter()

        for data_pred in [data_pred1, data_pred2]:

            # why does this
            data_norm = data_pred.copy()
            times = data_norm["t"]
            # Normalize to mean = 0 and standard deviation = 1
            times -= np.mean(times)
            times /= np.std(times)
            data_norm["t"] = times

            scores = k_fold_cross_validation(
                cf, data_norm, duration_col="t", event_col="E", k=3, predictor="predict_partial_hazard"
            )

            mean_score = 1 - np.mean(scores)
github MGensheimer / nnet-survival / nnet_survival_examples.py View on Github external
plt.plot(breaks,np.concatenate(([1],np.cumprod(y_pred[0,:]))),'bo-')
plt.plot(kmf.survival_function_.index.values, kmf.survival_function_.KM_estimate,color='k')
kmf.fit(t[int(n_samples/2)+1:], event_observed=f[int(n_samples/2)+1:])
plt.plot(breaks,np.concatenate(([1],np.cumprod(y_pred[-1,:]))),'ro-')
plt.plot(kmf.survival_function_.index.values, kmf.survival_function_.KM_estimate,color='k')
plt.xticks(np.arange(0, 2000.0001, 200))
plt.yticks(np.arange(0, 1.0001, 0.125))
plt.xlim([0,2000])
plt.ylim([0,1])
plt.xlabel('Follow-up time (days)')
plt.ylabel('Proportion surviving')
plt.title('One covariate. Actual=black, predicted=blue/red.')
plt.show()

myData=pd.DataFrame({'x_train' : x_train, 't' : t, 'f' : f}) 
cf = CoxPHFitter()
cf.fit(myData, 't', event_col='f')
cox_coef = cf.hazards_.x_train.values[0]
nn_coef = model.get_weights()[0][0][0]
print('Cox model coefficient:')
print(cox_coef)
print('Cox model hazard ratio:')
print(np.exp(cox_coef))
print('Neural network coefficient:')
print(nn_coef)
print('Neural network hazard ratio:')
print(np.exp(nn_coef))


##########################################################
#Proportional hazards model with flexible baseline hazard:
#Multiple variables, mix of discrete and continuous
github CamDavidsonPilon / lifelines / experiments / aalen_and_cook_simulation.py View on Github external
W = weibull_min.rvs(1, scale=1, loc=0, size=N)

Y = bX * X + bZ * Z + np.log(W)
T = np.exp(Y)

#######################################

df = pd.DataFrame({"T": T, "x": X, "x_": X_})


wf = WeibullAFTFitter().fit(df, "T")
wf.print_summary(4)


cph = CoxPHFitter().fit(df, "T", show_progress=True, step_size=1.0)
cph.print_summary(4)
github JakeColtman / pyBMA / pyBMA / CoxPHModel.py View on Github external
def _run(self):
        self._cf = lifelines.CoxPHFitter()
        self._cf.fit(self.df, self.survival_col, event_col=self.cens_col, include_likelihood=True)
github MStarmans91 / WORC / WORC / plotting / plot_SVM.py View on Github external
SpearmanP.append(SpearmanP_temp)

        # TODO: override with new survival code
        if survival:
            # Extract time to event and event from label data
            E_truth = np.asarray([labels[1][k][0] for k in test_indices])
            T_truth = np.asarray([labels[2][k][0] for k in test_indices])

            # Concordance index
            cindex.append(1 - ll.utils.concordance_index(T_truth, y_prediction, E_truth))

            # Fit Cox model using SVR output, time to event and event
            data = {'predict': y_prediction, 'E': E_truth, 'T': T_truth}
            data = pd.DataFrame(data=data, index=test_patient_IDs)

            cph = ll.CoxPHFitter()
            cph.fit(data, duration_col='T', event_col='E')

            coxcoef.append(cph.summary['coef']['predict'])
            coxp.append(cph.summary['p']['predict'])

    if output in ['scores', 'decision']:
        # Return the scores and true values of all patients
        return y_truths, y_scores, y_predictions, pids
    elif output == 'stats':
        # Compute statistics
        # Extract sample si ze
        N_1 = float(len(train_patient_IDs))
        N_2 = float(len(test_patient_IDs))

        # Compute alpha confidence intervals (CIs)
        stats = dict()