How to use the numba.cuda.jit function in numba

To help you get started, we’ve selected a few numba examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github peng-cao / mripy / fft / nufft_func_cuda.py View on Github external
@cuda.jit
def gaussker_2d1_cuda(x, y, c, hx, hy, nf1, nf2, nspread, tau, real_ftau, imag_ftau ):
    """This kernel function for gauss grid 1d typ1, and it will be executed by a thread."""
    i  = cuda.grid(1)
    if i > x.shape[0]:
        return
    #do the 1d griding here
    xi = x[i] % (2 * np.pi) #x, shift the source point xj so that it lies in [0,2*pi]
    yi = y[i] % (2 * np.pi) #y, shift the source point yj so that it lies in [0,2*pi]
    mx = 1 + int(xi // hx) #index for the closest grid point
    my = 1 + int(yi // hy) #index for the closest grid point
    for mmx in range(-nspread, nspread): #mm index for all the spreading points
        for mmy in range(-nspread,nspread):
            #griding with g(x,y) = exp(-(x^2 + y^2) / 4*tau)
            #ftau[(mx + mmx) % nf1, (my + mmy) % nf2] +=
            tmp = c[i] * exp(-0.25 * (\
            (xi - hx * (mx + mmx)) ** 2 + \
github peng-cao / mripy / fft / nufft_func_cuda.py View on Github external
@cuda.jit
def gaussker_3d1_fast_cuda(x, y, z, c, hx, hy, hz, nf1, nf2, nf3, nspread, tau, E3, real_ftau, imag_ftau ):
    """This kernel function for gauss grid 1d typ1, and it will be executed by a thread."""
    i     = cuda.grid(1)
    if i > c.shape[0]:
        return

    #read x, y, z values
    xi    = x[i] % (2 * np.pi) #x, shift the source point xj so that it lies in [0,2*pi]
    yi    = y[i] % (2 * np.pi) #y, shift the source point yj so that it lies in [0,2*pi]
    zi    = z[i] % (2 * np.pi) #z, shift the source point zj so that it lies in [0,2*pi]
    mx    = 1 + int(xi // hx) #index for the closest grid point
    my    = 1 + int(yi // hy) #index for the closest grid point
    mz    = 1 + int(zi // hz) #index for the closest grid point
    xi    = (xi - hx * mx) #offsets from the closest grid point
    yi    = (yi - hy * my) #offsets from the closest grid point
    zi    = (zi - hz * mz) #offsets from the closest grid point
github peng-cao / mripy / bloch_sim / sim_spin_cuda.py View on Github external
@cuda.jit(device=True)
def matcopy_cuda( B, A ):
    for i in range(3):
        for j in range(3):
            B[i, j] = A[i, j]
    return B
github pygae / clifford / clifford / tools / g3c / cuda.py View on Github external
@numba.cuda.jit(device=True)
def annhilate_k_device(K_val, C_val, output):
    k_4 = numba.cuda.local.array(32, dtype=numba.float64)
    project_val_cuda(K_val, k_4, 4)
    for i in range(32):
        k_4[i] = -k_4[i]
    k_4[0] += K_val[0]
    gp_device(k_4, C_val, output)
    normalise_mv_device(output)
github pygae / clifford / clifford / tools / g3c / cuda_products.py View on Github external
@cuda.jit(device=True)
def gmt_func_o31(value, other_value):
    return 0 + -1.0*value[24]*other_value[7] + 1.0*value[1]*other_value[30] + -1.0*value[17]*other_value[14] + 1.0*value[15]*other_value[16] + 1.0*value[18]*other_value[13] + -1.0*value[14]*other_value[17] + 1.0*value[13]*other_value[18] + 1.0*value[19]*other_value[12] + -1.0*value[20]*other_value[11] + 1.0*value[12]*other_value[19] + -1.0*value[11]*other_value[20] + 1.0*value[21]*other_value[10] + 1.0*value[10]*other_value[21] + -1.0*value[22]*other_value[9] + 1.0*value[23]*other_value[8] + 1.0*value[31]*other_value[0] + -1.0*value[9]*other_value[22] + 1.0*value[8]*other_value[23] + -1.0*value[7]*other_value[24] + 1.0*value[25]*other_value[6] + 1.0*value[26]*other_value[5] + 1.0*value[6]*other_value[25] + -1.0*value[27]*other_value[4] + 1.0*value[5]*other_value[26] + 1.0*value[28]*other_value[3] + -1.0*value[4]*other_value[27] + -1.0*value[29]*other_value[2] + 1.0*value[3]*other_value[28] + 1.0*value[30]*other_value[1] + -1.0*value[2]*other_value[29] + 1.0*value[0]*other_value[31] + 1.0*value[16]*other_value[15]
github peng-cao / mripy / bloch_sim / sim_spin_cuda.py View on Github external
@cuda.jit(device=True)
def vmuls_cuda( a, s ):
    for i in range(3):
        a[i] = a[i]*s    
    return a
github pygae / clifford / clifford / tools / g3c / cuda.py View on Github external
@numba.cuda.jit(device=True)
def set_as_unit_rotor_device(array):
    for j in range(1, 32):
        array[j] = 0.0
    array[0] = 1.0
github numba / numba / examples / laplace2d / laplace2d-numba-cuda.py View on Github external
@cuda.jit("(f8[:,:], f8[:,:], f8[:,:])")
def jacobi_relax_core(A, Anew, error):
    n = A.shape[0]
    m = A.shape[1]

    j = cuda.threadIdx.y + cuda.blockIdx.y * cuda.blockDim.y
    i = cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x
    if j >= 1 and j < n - 1 and i >= 1 and i < m - 1:
        Anew[j, i] = 0.25 * ( A[j, i + 1] + A[j, i - 1] \
                            + A[j - 1, i] + A[j + 1, i])
        error[j, i] = Anew[j, i] - A[j, i]
github pygae / clifford / clifford / tools / g3c / cuda.py View on Github external
@numba.cuda.jit(device=True)
def cost_line_to_line_device(L1, L2):
    R_val = numba.cuda.local.array(32, dtype=numba.float64)
    rotor_between_lines_device(L1, L2, R_val)
    return rotor_cost_device(R_val)
github rapidsai / cuml / python / cuml / utils / numba_utils.py View on Github external
@cuda.jit
def gpu_zeros_1d(out):
    i = cuda.grid(1)
    if i < out.shape[0]:
        out[i] = 0