How to use xhistogram - 10 common examples

To help you get started, we’ve selected a few xhistogram examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github xgcm / xhistogram / doc / conf.py View on Github external
# The encoding of source files.
#source_encoding = 'utf-8-sig'

# The master toctree document.
master_doc = 'index'

# General information about the project.
project = u'xhistogram'
copyright = u'2016-2019, xhistogram developers'

# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The full version, including alpha/beta/rc tags.
release = xhistogram.__version__
# The short X.Y version.
version = '.'.join(release.split('.')[:2])

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#language = None

# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build']
github xgcm / xhistogram / xhistogram / core.py View on Github external
def _bincount_2d(bin_indices, weights, N, hist_shapes):
    # a trick to apply bincount on an axis-by-axis basis
    # https://stackoverflow.com/questions/40591754/vectorizing-numpy-bincount
    # https://stackoverflow.com/questions/40588403/vectorized-searchsorted-numpy
    M = bin_indices.shape[0]
    if weights is not None:
        weights = weights.ravel()
    bin_indices_offset = (bin_indices + (N * np.arange(M)[:, None])).ravel()
    bc_offset = bincount(bin_indices_offset, weights=weights,
                            minlength=N*M)
    final_shape = (M,) + tuple(hist_shapes)
    return bc_offset.reshape(final_shape)
github xgcm / xhistogram / xhistogram / core.py View on Github external
def _bincount_loop(bin_indices, weights, N, hist_shapes, block_chunks):
    M = bin_indices.shape[0]
    assert sum(block_chunks) == M
    block_counts = []
    # iterate over chunks
    bounds = np.cumsum((0,) + block_chunks)
    for m_start, m_end in zip(bounds[:-1], bounds[1:]):
        bin_indices_block = bin_indices[m_start:m_end]
        weights_block = weights[m_start:m_end] if weights is not None else None
        bc_block = _bincount_2d(bin_indices_block, weights_block, N, hist_shapes)
        block_counts.append(bc_block)
    all_counts = concatenate(block_counts)
    final_shape = (bin_indices.shape[0],) + tuple(hist_shapes)
    return all_counts.reshape(final_shape)
github xgcm / xhistogram / xhistogram / core.py View on Github external
def reshape_input(a):
        if do_full_array:
            d = a.ravel()[None, :]
        else:
            # reshape the array to 2D
            # axis 0: preserved axis after histogram
            # axis 1: calculate histogram along this axis
            new_pos = tuple(range(-len(axis), 0))
            c = np.moveaxis(a, axis, new_pos)
            split_idx = c.ndim - len(axis)
            dims_0 = c.shape[:split_idx]
            assert dims_0 == kept_axes_shape
            dims_1 = c.shape[split_idx:]
            new_dim_0 = np.prod(dims_0)
            new_dim_1 = np.prod(dims_1)
            d = reshape(c, (new_dim_0, new_dim_1))
        return d
github xgcm / xhistogram / xhistogram / core.py View on Github external
if weights is not None:
        weights_reshaped = all_args_reshaped.pop()
    else:
        weights_reshaped = None

    h = _histogram_2d_vectorized(*all_args_reshaped, bins=bins,
                                 weights=weights_reshaped,
                                 density=density, block_size=block_size)

    if h.shape[0] == 1:
        assert do_full_array
        h = h.squeeze()
    else:
        final_shape = kept_axes_shape + h.shape[1:]
        h = reshape(h, final_shape)

    return h
github xgcm / xhistogram / xhistogram / core.py View on Github external
nrows, ncols = a0.shape
    nbins = [len(b) for b in bins]
    hist_shapes = [nb+1 for nb in nbins]

    # a marginally faster implementation would be to use searchsorted,
    # like numpy histogram itself does
    # https://github.com/numpy/numpy/blob/9c98662ee2f7daca3f9fae9d5144a9a8d3cabe8c/numpy/lib/histograms.py#L864-L882
    # for now we stick with `digitize` because it's easy to understand how it works

    # the maximum possible value of of digitize is nbins
    # for right=False:
    #   - 0 corresponds to a < b[0]
    #   - i corresponds to bins[i-1] <= a < b[i]
    #   - nbins corresponds to a a >= b[1]
    each_bin_indices = [digitize(a, b) for a, b in zip(args, bins)]
    # product of the bins gives the joint distribution
    if N_inputs > 1:
        bin_indices = ravel_multi_index(each_bin_indices, hist_shapes)
    else:
        bin_indices = each_bin_indices[0]
    # total number of unique bin indices
    N = reduce(lambda x, y: x*y, hist_shapes)

    bin_counts = _dispatch_bincount(bin_indices, weights, N, hist_shapes,
                                    block_size=block_size)

    # just throw out everything outside of the bins, as np.histogram does
    # TODO: make this optional?
    slices = (slice(None),) + (N_inputs * (slice(1, -1),))
    return bin_counts[slices]
github xgcm / xhistogram / xhistogram / core.py View on Github external
hist_shapes = [nb+1 for nb in nbins]

    # a marginally faster implementation would be to use searchsorted,
    # like numpy histogram itself does
    # https://github.com/numpy/numpy/blob/9c98662ee2f7daca3f9fae9d5144a9a8d3cabe8c/numpy/lib/histograms.py#L864-L882
    # for now we stick with `digitize` because it's easy to understand how it works

    # the maximum possible value of of digitize is nbins
    # for right=False:
    #   - 0 corresponds to a < b[0]
    #   - i corresponds to bins[i-1] <= a < b[i]
    #   - nbins corresponds to a a >= b[1]
    each_bin_indices = [digitize(a, b) for a, b in zip(args, bins)]
    # product of the bins gives the joint distribution
    if N_inputs > 1:
        bin_indices = ravel_multi_index(each_bin_indices, hist_shapes)
    else:
        bin_indices = each_bin_indices[0]
    # total number of unique bin indices
    N = reduce(lambda x, y: x*y, hist_shapes)

    bin_counts = _dispatch_bincount(bin_indices, weights, N, hist_shapes,
                                    block_size=block_size)

    # just throw out everything outside of the bins, as np.histogram does
    # TODO: make this optional?
    slices = (slice(None),) + (N_inputs * (slice(1, -1),))
    return bin_counts[slices]
github xgcm / xhistogram / xhistogram / xarray.py View on Github external
for name in dims_to_keep if name in a0.coords}
    all_coords = {}
    all_coords.update(old_coords)
    all_coords.update(new_coords)

    # CF conventions tell us how to specify cell boundaries
    # http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/cf-conventions.html#cell-boundaries
    # However, they require introduction of an additional dimension.
    # I don't like that.
    edge_dims = [a.name + bin_edge_suffix for a in args[:N_args]]
    edge_coords = {name: ((name,), bin_edge, a.attrs)
                  for name, bin_edge, a in zip(edge_dims, bins, args)}

    output_name = '_'.join(['histogram'] + [a.name for a in args[:N_args]])

    da_out = xr.DataArray(h_data, dims=output_dims, coords=all_coords,
                          name=output_name)
    return da_out
github xgcm / xhistogram / xhistogram / xarray.py View on Github external
# TODO: replace this with a more robust function
    assert len(bins)==N_args
    for bin in bins:
        assert isinstance(bin, np.ndarray), 'all bins must be numpy arrays'

    for a in args:
        # TODO: make this a more robust check
        assert a.name is not None, 'all arrays must have a name'

    # we drop coords to simplify alignment
    args = [da.reset_coords(drop=True) for da in args]
    if N_weights:
        args += [weights.reset_coords(drop=True)]
    # explicitly broadcast so we understand what is going into apply_ufunc
    # (apply_ufunc might be doing this by itself again)
    args = list(xr.align(*args, join='exact'))



    # what happens if we skip this?
    #args = list(xr.broadcast(*args))
    a0 = args[0]
    a_dims = a0.dims

    # roll our own broadcasting
    # now manually expand the arrays
    all_dims = [d for a in args for d in a.dims]
    all_dims_ordered = list(OrderedDict.fromkeys(all_dims))
    args_expanded = []
    for a in args:
        expand_keys = [d for d in all_dims_ordered if d not in a.dims]
        a_expanded = a.expand_dims({k: 1 for k in expand_keys})
github xgcm / xhistogram / xhistogram / xarray.py View on Github external
args_data = [a.data for a in args_transposed]

    if N_weights:
        weights_data = args_data.pop()
    else:
        weights_data = None

    if dim is not None:
        dims_to_keep = [d for d in all_dims_ordered if d not in dim]
        axis = [args_transposed[0].get_axis_num(d) for d in dim]
    else:
        dims_to_keep = []
        axis = None

    print(args_data[0].shape)
    h_data = _histogram(*args_data, weights=weights_data, bins=bins, axis=axis,
                        block_size=block_size)

    # create output dims
    new_dims = [a.name + bin_dim_suffix for a in args[:N_args]]
    output_dims = dims_to_keep + new_dims

    # create new coords
    bin_centers = [0.5*(bin[:-1] + bin[1:]) for bin in bins]
    new_coords = {name: ((name,), bin_center, a.attrs)
                  for name, bin_center, a in zip(new_dims, bin_centers, args)}

    old_coords = {name: a0[name]
                  for name in dims_to_keep if name in a0.coords}
    all_coords = {}
    all_coords.update(old_coords)
    all_coords.update(new_coords)