How to use the cupy.ElementwiseKernel function in cupy

To help you get started, we’ve selected a few cupy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ibm-research-tokyo / dybm / src / pydybm / arraymath / dycupy / operations.py View on Github external
'float64 o',
    'o = max(abs(variable + delta) - strength * th, 0.0) * '
    '    (variable + delta == 0 ? 0 : variable + delta > 0 ? 1 : -1)',
    'sgd_L1_regularization'
)


def sgd_L1_regularization(variable, delta, strength, th):
    if strength == 0.0:
        variable += delta
    else:
        sgd_L1_regularization_kernel_(variable, delta, strength, th, variable)
    return variable


rmsprop_get_delta_kernel_ = cupy.ElementwiseKernel(
    'float64 alpha, float64 first, float64 second, float64 delta',
    'float64 o',
    'o = alpha * first / (sqrt(second) + delta)',
    'rmsprop_get_delta'
)

rmsprop_get_delta = rmsprop_get_delta_kernel_
adagrad_get_delta = rmsprop_get_delta_kernel_


rmsprop_get_threshold_kernel_ = cupy.ElementwiseKernel(
    'float64 alpha, float64 second, float64 delta',
    'float64 o',
    'o = alpha / (sqrt(second) + delta)',
    'rmsprop_get_delta'
)
github mikgroup / sigpy / sigpy / thresh.py View on Github external
'T output',
        """
        S abs_input = abs(input);
        T sign;
        if (abs_input == 0)
            sign = 0;
        else
            sign = input / (T) abs_input;
        S mag = abs_input - lamda;
        mag = (abs(mag) + mag) / 2.;

        output = (T) mag * sign;
        """,
        name='soft_thresh')

    _hard_thresh_cuda = cp.ElementwiseKernel(
        'S lamda, T input',
        'T output',
        """
        S abs_input = abs(input);
        if (abs_input > lamda)
            output = input;
        else
            output = 0;
        """,
        name='hard_thresh')
github mikgroup / sigpy / sigpy / interp.py View on Github external
((S) y - ky) / (width / 2.0), param);
                for (int x = x0; x < x1 + 1; x++) {
                    const S w = wy * kernel(
                        ((S) x - kx) / (width / 2.0), param);
                    for (int b = 0; b < batch_size; b++) {
                        const int input_idx[] = {b, i};
                        const T v = (T) w * input[input_idx];
                        const int output_idx[] = {b, mod(y, ny), mod(x, nx)};
                        atomicAdd(&output[output_idx], v);
                    }
                }
            }
            """, name='gridding2', preamble=kernel + mod_cuda,
            reduce_dims=False)

        _gridding3_cuda = cp.ElementwiseKernel(
            'raw T input, raw S coord, raw S width, raw S param', 'raw T output', """
            const int batch_size = output.shape()[0];
            const int nz = output.shape()[1];
            const int ny = output.shape()[2];
            const int nx = output.shape()[3];

            const int coordz_idx[] = {i, 0};
            const S kz = coord[coordz_idx];
            const int coordy_idx[] = {i, 1};
            const S ky = coord[coordy_idx];
            const int coordx_idx[] = {i, 2};
            const S kx = coord[coordx_idx];

            const int x0 = ceil(kx - width / 2.0);
            const int y0 = ceil(ky - width / 2.0);
            const int z0 = ceil(kz - width / 2.0);
github fabiencro / knmt / nmt_chainer / additional_links / layer_normalization.py View on Github external
'y = 1.0/sqrt(a + 1e-5)',  # post-reduction map
        '0',  # identity value
        'inv_norm_comp'  # kernel name
    )
    
    
    scale_output = cp.ElementwiseKernel(
         'T x, T inv_norm, T gamma, T beta',
         'T normalized, T scaled',
          '''
              normalized = x * inv_norm;
              scaled = normalized * gamma + beta;
         ''',
         'scale_output')
    
    backprop_scale = cp.ElementwiseKernel(
         'T inv_norm, T gy_centered, T normalized, T sc_prod',
         'T z',
          '''
              z = inv_norm *(gy_centered - normalized * sc_prod);
         ''',
         'backprop_scale')
except ImportError:
    inv_norm_comp = None
    scale_output = None
    backprop_scale = None
        
class LayerNormalization(function.Function):
    def __init__(self, eps=1e-5, gpu_optim=True):
        self.eps = eps
        self.gpu_optim = gpu_optim
github StrangerZhang / pyECO / eco / cuda_tools.py View on Github external
kw: kernle width
        sy: stride y
        sx: stride x
        ph: padding height
        pw: padding width
    """
    n, c, h, w = img.shape
    if out_h is None:
        out_h = get_conv_outsize(h, kh, sy, ph, cover_all, dy)
    assert out_h > 0, 'Height in the output should be positive.'
    if out_w is None:
        out_w = get_conv_outsize(w, kw, sx, pw, cover_all, dx)
    assert out_w > 0, 'Width in the output should be positive.'

    col = cp.empty((n, c, kh, kw, out_h, out_w), dtype=img.dtype)
    cp.ElementwiseKernel(
        'raw T img, int32 h, int32 w, int32 out_h, int32 out_w,'
        'int32 kh, int32 kw, int32 sy, int32 sx, int32 ph, int32 pw,'
        'int32 dy, int32 dx',
        'T col',
        '''
           int c0 = i / (kh * kw * out_h * out_w);
           int ky = i / (kw * out_h * out_w) % kh;
           int kx = i / (out_h * out_w) % kw;
           int out_y = i / out_w % out_h;
           int out_x = i % out_w;
           int in_y = ky * dy + out_y * sy - ph;
           int in_x = kx * dx + out_x * sx - pw;
           if (in_y >= 0 && in_y < h && in_x >= 0 && in_x < w) {
             col = img[in_x + w * (in_y + h * c0)];
           } else {
             col = 0;
github mikgroup / sigpy / sigpy / interp.py View on Github external
def _get_gridding_cuda(kernel):
        if kernel == 'spline':
            kernel = _spline_kernel_cuda
        elif kernel == 'kaiser_bessel':
            kernel = _kaiser_bessel_kernel_cuda

        _gridding1_cuda = cp.ElementwiseKernel(
            'raw T input, raw S coord, raw S width, raw S param',
            'raw T output',
            """
            const int batch_size = output.shape()[0];
            const int nx = output.shape()[1];

            const int coord_idx[] = {i, 0};
            const S kx = coord[coord_idx];
            const int x0 = ceil(kx - width / 2.0);
            const int x1 = floor(kx + width / 2.0);

            for (int x = x0; x < x1 + 1; x++) {
                const S w = kernel(
                    ((S) x - kx) / (width / 2.0), param);
                for (int b = 0; b < batch_size; b++) {
                    const int input_idx[] = {b, i};
github chainer / chainer / cupy / statistics / histogram.py View on Github external
raise ValueError('The first argument of bincount must be non-negative')
    if weights is not None and x.shape != weights.shape:
        raise ValueError('The weights and list don\'t have the same length.')
    if minlength is not None:
        minlength = int(minlength)
        if minlength <= 0:
            raise ValueError('minlength must be positive')

    size = int(cupy.max(x)) + 1
    if minlength is not None:
        size = max(size, minlength)

    if weights is None:
        # atomicAdd for int64 is not provided
        b = cupy.zeros((size,), dtype=cupy.int32)
        cupy.ElementwiseKernel(
            'S x', 'raw U bin',
            'atomicAdd(&bin[x], 1)',
            'bincount_kernel'
        )(x, b)
        b = b.astype(numpy.intp)
    else:
        # atomicAdd for float64 is not provided
        b = cupy.zeros((size,), dtype=cupy.float32)
        cupy.ElementwiseKernel(
            'S x, T w', 'raw U bin',
            'atomicAdd(&bin[x], w)',
            'bincount_with_weight_kernel'
        )(x, weights, b)
        b = b.astype(cupy.float64)

    return b