How to use the arviz.stats.stats_utils.histogram function in arviz

To help you get started, we’ve selected a few arviz examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github arviz-devs / arviz / arviz / stats / stats.py View on Github external
multimodal=multimodal,
                )
                for row in ary.T
            ]
        )
        return hpd_array

    if multimodal:
        if ary.dtype.kind == "f":
            density, lower, upper = _fast_kde(ary)
            range_x = upper - lower
            dx = range_x / len(density)
            bins = np.linspace(lower, upper, len(density))
        else:
            bins = get_bins(ary)
            _, density, _ = histogram(ary, bins=bins)
            dx = np.diff(bins)[0]

        density *= dx

        idx = np.argsort(-density)
        intervals = bins[idx][density[idx].cumsum() <= credible_interval]
        intervals.sort()

        intervals_splitted = np.split(intervals, np.where(np.diff(intervals) >= dx * 1.1)[0] + 1)

        hpd_intervals = []
        for interval in intervals_splitted:
            if interval.size == 0:
                hpd_intervals.append((bins[0], bins[0]))
            else:
                hpd_intervals.append((interval[0], interval[-1]))
github arviz-devs / arviz / arviz / plots / backends / matplotlib / violinplot.py View on Github external
def cat_hist(val, shade, ax, **kwargs_shade):
    """Auxiliary function to plot discrete-violinplots."""
    bins = get_bins(val)
    _, binned_d, _ = histogram(val, bins=bins)

    bin_edges = np.linspace(np.min(val), np.max(val), len(bins))
    centers = 0.5 * (bin_edges + np.roll(bin_edges, 1))[:-1]
    heights = np.diff(bin_edges)

    lefts = -0.5 * binned_d
    ax.barh(centers, binned_d, height=heights, left=lefts, alpha=shade, **kwargs_shade)
github arviz-devs / arviz / arviz / plots / backends / matplotlib / khatplot.py View on Github external
verticalalignment="bottom",
                fontsize=0.8 * xt_labelsize,
            )

    xmin, xmax = ax.get_xlim()
    if show_bins:
        xmax += n_data_points / 12
    ylims1 = ax.get_ylim()
    ax.hlines([0, 0.5, 0.7, 1], xmin=xmin, xmax=xmax, linewidth=linewidth, **hlines_kwargs)
    ylims2 = ax.get_ylim()
    ymin = min(ylims1[0], ylims2[0])
    ymax = min(ylims1[1], ylims2[1])
    if show_bins:
        bin_edges = np.array([ymin, 0.5, 0.7, 1, ymax])
        bin_edges = bin_edges[(bin_edges >= ymin) & (bin_edges <= ymax)]
        hist, _, _ = histogram(khats, bin_edges)
        for idx, count in enumerate(hist):
            ax.text(
                (n_data_points - 1 + xmax) / 2,
                np.mean(bin_edges[idx : idx + 2]),
                bin_format.format(count, count / n_data_points * 100),
                horizontalalignment="center",
                verticalalignment="center",
            )
    ax.set_ylim(ymin, ymax)
    ax.set_xlim(xmin, xmax)

    ax.set_xlabel("Data Point", fontsize=ax_labelsize)
    ax.set_ylabel(r"Shape parameter k", fontsize=ax_labelsize)
    ax.tick_params(labelsize=xt_labelsize)
    if xlabels:
        set_xticklabels(ax, coord_labels)
github arviz-devs / arviz / arviz / plots / backends / matplotlib / forestplot.py View on Github external
"""Get data for each ridgeplot for the variable."""
        xvals, yvals, pdfs, colors = [], [], [], []
        for y, *_, values, color in self.iterator():
            yvals.append(y)
            colors.append(color)
            values = values.flatten()
            values = values[np.isfinite(values)]

            if ridgeplot_kind == "auto":
                kind = "hist" if np.all(np.mod(values, 1) == 0) else "density"
            else:
                kind = ridgeplot_kind

            if kind == "hist":
                bins = get_bins(values)
                _, density, x = histogram(values, bins=bins)
                x = x[:-1]
            elif kind == "density":
                density, lower, upper = _fast_kde(values)
                x = np.linspace(lower, upper, len(density))

            xvals.append(x)
            pdfs.append(density)

        scaling = max(np.max(j) for j in pdfs)
        for y, x, pdf, color in zip(yvals, xvals, pdfs, colors):
            y = y * np.ones_like(x)
            yield x, y, mult * pdf / scaling + y, color
github arviz-devs / arviz / arviz / stats / diagnostics.py View on Github external
Parameters
    ----------
    pareto_tail_indices : array
      Pareto tail indices.

    Returns
    -------
    df_k : dataframe
      Dataframe containing k diagnostic values.
    """
    _numba_flag = Numba.numba_flag
    if _numba_flag:
        bins = np.asarray([-np.Inf, 0.5, 0.7, 1, np.Inf])
        kcounts, *_ = histogram(pareto_tail_indices, bins)
    else:
        kcounts, *_ = histogram(pareto_tail_indices, bins=[-np.Inf, 0.5, 0.7, 1, np.Inf])
    kprop = kcounts / len(pareto_tail_indices) * 100
    df_k = pd.DataFrame(
        dict(_=["(good)", "(ok)", "(bad)", "(very bad)"], Count=kcounts, Pct=kprop)
    ).rename(index={0: "(-Inf, 0.5]", 1: " (0.5, 0.7]", 2: "   (0.7, 1]", 3: "   (1, Inf)"})

    if np.sum(kcounts[1:]) == 0:
        warnings.warn("All Pareto k estimates are good (k < 0.5)")
    elif np.sum(kcounts[2:]) == 0:
        warnings.warn("All Pareto k estimates are ok (k < 0.7)")

    return df_k
github arviz-devs / arviz / arviz / stats / diagnostics.py View on Github external
"""Display a summary of Pareto tail indices.

    Parameters
    ----------
    pareto_tail_indices : array
      Pareto tail indices.

    Returns
    -------
    df_k : dataframe
      Dataframe containing k diagnostic values.
    """
    _numba_flag = Numba.numba_flag
    if _numba_flag:
        bins = np.asarray([-np.Inf, 0.5, 0.7, 1, np.Inf])
        kcounts, *_ = histogram(pareto_tail_indices, bins)
    else:
        kcounts, *_ = histogram(pareto_tail_indices, bins=[-np.Inf, 0.5, 0.7, 1, np.Inf])
    kprop = kcounts / len(pareto_tail_indices) * 100
    df_k = pd.DataFrame(
        dict(_=["(good)", "(ok)", "(bad)", "(very bad)"], Count=kcounts, Pct=kprop)
    ).rename(index={0: "(-Inf, 0.5]", 1: " (0.5, 0.7]", 2: "   (0.7, 1]", 3: "   (1, Inf)"})

    if np.sum(kcounts[1:]) == 0:
        warnings.warn("All Pareto k estimates are good (k < 0.5)")
    elif np.sum(kcounts[2:]) == 0:
        warnings.warn("All Pareto k estimates are ok (k < 0.7)")

    return df_k
github arviz-devs / arviz / arviz / plots / kdeplot.py View on Github external
if xmin is None:
        xmin = np.min(x)
    if xmax is None:
        xmax = np.max(x)

    assert np.min(x) >= xmin
    assert np.max(x) <= xmax

    log_len_x = np.log(len_x) * bw

    n_bins = min(int(len_x ** (1 / 3) * log_len_x * 2), n_points)
    if n_bins < 2:
        warnings.warn("kde plot failed, you may want to check your data")
        return np.array([np.nan]), np.nan, np.nan

    _, grid, _ = histogram(x, n_bins, range_hist=(xmin, xmax))

    scotts_factor = len_x ** (-0.2)
    kern_nx = int(scotts_factor * 2 * np.pi * log_len_x)
    kernel = gaussian(kern_nx, scotts_factor * log_len_x)

    npad = min(n_bins, 2 * kern_nx)
    grid = np.concatenate([grid[npad:0:-1], grid, grid[n_bins : n_bins - npad : -1]])
    density = convolve(grid, kernel, mode="same", method="direct")[npad : npad + n_bins]
    norm_factor = (2 * np.pi * log_len_x ** 2 * scotts_factor ** 2) ** 0.5

    density /= norm_factor

    if cumulative:
        density = density.cumsum() / density.sum()

    return density, xmin, xmax
github arviz-devs / arviz / arviz / plots / backends / matplotlib / ppcplot.py View on Github external
def animate(i):
                _, y_vals, x_vals = histogram(pp_sampled_vals[i], bins="auto")
                line.set_data(x_vals[:-1], y_vals)
                return (line,)