How to use the lifelines.utils.pass_for_numeric_dtypes_or_raise_array function in lifelines

To help you get started, we’ve selected a few lifelines examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github CamDavidsonPilon / lifelines / lifelines / fitters / __init__.py View on Github external
relative time when a subject entered the study. This is useful for left-truncated (not left-censored) observations. If None, all members of the population
            entered study when they were "born": time zero.
        weights: an array, or pd.Series, of length n
            integer weights per observation
        initial_point: (d,) numpy array, optional
            initialize the starting point of the iterative
            algorithm. Default is the zero vector.

        Returns
        -------
          self
            self with new properties like ``cumulative_hazard_``, ``survival_function_``

        """

        self.durations = np.asarray(utils.pass_for_numeric_dtypes_or_raise_array(durations))
        utils.check_nans_or_infs(self.durations)
        utils.check_positivity(self.durations)

        return self._fit(
            (self.durations, None),
            event_observed=event_observed,
            timeline=timeline,
            label=label,
            alpha=alpha,
            ci_labels=ci_labels,
            show_progress=show_progress,
            entry=entry,
            weights=weights,
            initial_point=initial_point,
        )
github CamDavidsonPilon / lifelines / lifelines / fitters / __init__.py View on Github external
>>> aft.print_summary()
        >>> aft.predict_median(df)
        >>>
        >>> aft = WeibullAFTFitter()
        >>> aft.fit(df, 'T', 'E', ancillary_df=df)
        >>> aft.print_summary()
        >>> aft.predict_median(df)

        """
        self.duration_col = duration_col
        self._time_cols = [duration_col]
        self.fit_intercept = utils.coalesce(fit_intercept, self.fit_intercept)

        df = df.copy()

        T = utils.pass_for_numeric_dtypes_or_raise_array(df.pop(self.duration_col)).astype(float)
        self.durations = T.copy()

        primary_columns = df.columns.difference([self.duration_col, event_col]).tolist()

        if isinstance(ancillary_df, pd.DataFrame):
            self.model_ancillary = True
            assert ancillary_df.shape[0] == df.shape[0], "ancillary_df must be the same shape[0] as df"
            regressors = {
                self._primary_parameter_name: primary_columns,
                self._ancillary_parameter_name: ancillary_df.columns.difference(
                    [self.duration_col, event_col]
                ).tolist(),
            }
            ancillary_cols_to_consider = ancillary_df.columns.difference(df.columns).difference(
                [self.duration_col, event_col]
            )
github CamDavidsonPilon / lifelines / lifelines / fitters / __init__.py View on Github external
) -> "ParametricRegressionFitter":

        self._time_fit_was_called = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") + " UTC"
        self._n_examples = df.shape[0]
        self.weights_col = weights_col
        self.entry_col = entry_col
        self.event_col = event_col
        self.robust = robust

        if timeline is not None:
            self.timeline = np.sort(np.asarray(timeline).astype(float))
        else:
            self.timeline = np.unique(utils.coalesce(*Ts))

        E = (
            utils.pass_for_numeric_dtypes_or_raise_array(df.pop(self.event_col))
            if (self.event_col is not None)
            else pd.Series(np.ones(self._n_examples, dtype=bool), index=df.index, name="E")
        )
        weights = (
            utils.pass_for_numeric_dtypes_or_raise_array(df.pop(self.weights_col)).astype(float)
            if (self.weights_col is not None)
            else pd.Series(np.ones(self._n_examples, dtype=float), index=df.index, name="weights")
        )

        entries = (
            utils.pass_for_numeric_dtypes_or_raise_array(df.pop(entry_col)).astype(float)
            if (entry_col is not None)
            else pd.Series(np.zeros(self._n_examples, dtype=float), index=df.index, name="entry")
        )

        utils.check_nans_or_infs(E)
github CamDavidsonPilon / lifelines / lifelines / fitters / __init__.py View on Github external
>>>     'age': [4, 3, 9, 8, 7, 4, 4, 3, 2, 5, 6, 7],
        >>> })
        >>>
        >>> aft = WeibullAFTFitter()
        >>> aft.fit_left_censoring(df, 'T', 'E')
        >>> aft.print_summary()
        >>> aft.predict_median(df)
        >>>
        >>> aft = WeibullAFTFitter()
        >>> aft.fit_left_censoring(df, 'T', 'E', ancillary_df=df)
        >>> aft.print_summary()
        >>> aft.predict_median(df)
        """
        df = df.copy()

        T = utils.pass_for_numeric_dtypes_or_raise_array(df.pop(duration_col)).astype(float)
        self.durations = T.copy()
        self.fit_intercept = utils.coalesce(fit_intercept, self.fit_intercept)
        self.duration_col = duration_col

        primary_columns = df.columns.difference([duration_col, event_col]).tolist()
        if isinstance(ancillary_df, pd.DataFrame):
            self.model_ancillary = True
            assert ancillary_df.shape[0] == df.shape[0], "ancillary_df must be the same shape[0] as df"
            regressors = {
                self._primary_parameter_name: primary_columns,
                self._ancillary_parameter_name: ancillary_df.columns.tolist(),
            }
            df = pd.concat([df, ancillary_df[ancillary_df.columns.difference(df.columns)]], axis=1)

        elif (ancillary_df is True) or self.model_ancillary:
            self.model_ancillary = True
github CamDavidsonPilon / lifelines / lifelines / fitters / cox_time_varying_fitter.py View on Github external
self.weights_col = weights_col
            if (df[weights_col] <= 0).any():
                raise ValueError("values in weights_col must be positive.")

        df = df.rename(
            columns={id_col: "id", event_col: "event", start_col: "start", stop_col: "stop", weights_col: "__weights"}
        )

        if self.strata is None:
            df = df.set_index("id")
        else:
            df = df.set_index(_to_list(self.strata) + ["id"])  # TODO: needs to be a list
            df = df.sort_index()

        events, start, stop = (
            pass_for_numeric_dtypes_or_raise_array(df.pop("event")).astype(bool),
            df.pop("start"),
            df.pop("stop"),
        )
        weights = df.pop("__weights").astype(float)

        df = df.astype(float)
        self._check_values(df, events, start, stop)

        self._norm_mean = df.mean(0)
        self._norm_std = df.std(0)

        params_ = self._newton_rhaphson(
            normalize(df, self._norm_mean, self._norm_std),
            events,
            start,
            stop,
github CamDavidsonPilon / lifelines / lifelines / fitters / __init__.py View on Github external
entry: an array, or pd.Series, of length n
            relative time when a subject entered the study. This is useful for left-truncated (not left-censored) observations. If None, all members of the population
            entered study when they were "born": time zero.
        weights: an array, or pd.Series, of length n
            integer weights per observation
        initial_point: (d,) numpy array, optional
            initialize the starting point of the iterative
            algorithm. Default is the zero vector.

        Returns
        -------
          self
            self with new properties like ``cumulative_hazard_``, ``survival_function_``

        """
        self.upper_bound = np.atleast_1d(utils.pass_for_numeric_dtypes_or_raise_array(upper_bound))
        self.lower_bound = np.atleast_1d(utils.pass_for_numeric_dtypes_or_raise_array(lower_bound))

        utils.check_nans_or_infs(self.lower_bound)
        utils.check_positivity(self.upper_bound)

        if (self.upper_bound < self.lower_bound).any():
            raise ValueError("All upper_bound times must be greater than or equal to lower_bound times.")

        if event_observed is None:
            event_observed = self.upper_bound == self.lower_bound

        if ((self.lower_bound == self.upper_bound) != event_observed).any():
            raise ValueError(
                "For all rows, lower_bound == upper_bound if and only if event observed = 1 (uncensored). Likewise, lower_bound < upper_bound if and only if event observed = 0 (censored)"
            )
github CamDavidsonPilon / lifelines / lifelines / fitters / __init__.py View on Github external
self.entry_col = entry_col
        self.event_col = event_col
        self.robust = robust

        if timeline is not None:
            self.timeline = np.sort(np.asarray(timeline).astype(float))
        else:
            self.timeline = np.unique(utils.coalesce(*Ts))

        E = (
            utils.pass_for_numeric_dtypes_or_raise_array(df.pop(self.event_col))
            if (self.event_col is not None)
            else pd.Series(np.ones(self._n_examples, dtype=bool), index=df.index, name="E")
        )
        weights = (
            utils.pass_for_numeric_dtypes_or_raise_array(df.pop(self.weights_col)).astype(float)
            if (self.weights_col is not None)
            else pd.Series(np.ones(self._n_examples, dtype=float), index=df.index, name="weights")
        )

        entries = (
            utils.pass_for_numeric_dtypes_or_raise_array(df.pop(entry_col)).astype(float)
            if (entry_col is not None)
            else pd.Series(np.zeros(self._n_examples, dtype=float), index=df.index, name="entry")
        )

        utils.check_nans_or_infs(E)
        E = E.astype(bool)
        self.event_observed = E.copy()
        self.entry = entries.copy()
        self.weights = weights.copy()
github CamDavidsonPilon / lifelines / lifelines / fitters / __init__.py View on Github external
>>>
        >>> aft = WeibullAFTFitter()
        >>> aft.fit_interval_censoring(df, 'start', 'stop', 'E', ancillary_df=df)
        >>> aft.print_summary()
        >>> aft.predict_median(df)
        """

        self.lower_bound_col = lower_bound_col
        self.upper_bound_col = upper_bound_col
        self.fit_intercept = utils.coalesce(fit_intercept, self.fit_intercept)
        self._time_cols = [lower_bound_col, upper_bound_col]

        df = df.copy()

        lower_bound = utils.pass_for_numeric_dtypes_or_raise_array(df.pop(lower_bound_col)).astype(float)
        upper_bound = utils.pass_for_numeric_dtypes_or_raise_array(df.pop(upper_bound_col)).astype(float)

        if event_col is None:
            event_col = "E_lifelines_added"
            df[event_col] = lower_bound == upper_bound

        if ((lower_bound == upper_bound) != df[event_col]).any():
            raise ValueError(
                "For all rows, lower_bound == upper_bound if and only if event observed = 1 (uncensored). Likewise, lower_bound < upper_bound if and only if event observed = 0 (censored)"
            )
        if (lower_bound > upper_bound).any():
            raise ValueError("All upper bound measurements must be greater than or equal to lower bound measurements.")

        self.lower_bound = lower_bound
        self.upper_bound = upper_bound

        primary_columns = df.columns.difference([self.lower_bound_col, self.upper_bound_col, event_col]).tolist()