Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
@cuda.jit
def gaussker_2d1_cuda(x, y, c, hx, hy, nf1, nf2, nspread, tau, real_ftau, imag_ftau ):
"""This kernel function for gauss grid 1d typ1, and it will be executed by a thread."""
i = cuda.grid(1)
if i > x.shape[0]:
return
#do the 1d griding here
xi = x[i] % (2 * np.pi) #x, shift the source point xj so that it lies in [0,2*pi]
yi = y[i] % (2 * np.pi) #y, shift the source point yj so that it lies in [0,2*pi]
mx = 1 + int(xi // hx) #index for the closest grid point
my = 1 + int(yi // hy) #index for the closest grid point
for mmx in range(-nspread, nspread): #mm index for all the spreading points
for mmy in range(-nspread,nspread):
#griding with g(x,y) = exp(-(x^2 + y^2) / 4*tau)
#ftau[(mx + mmx) % nf1, (my + mmy) % nf2] +=
tmp = c[i] * exp(-0.25 * (\
(xi - hx * (mx + mmx)) ** 2 + \
@cuda.jit
def gaussker_3d1_fast_cuda(x, y, z, c, hx, hy, hz, nf1, nf2, nf3, nspread, tau, E3, real_ftau, imag_ftau ):
"""This kernel function for gauss grid 1d typ1, and it will be executed by a thread."""
i = cuda.grid(1)
if i > c.shape[0]:
return
#read x, y, z values
xi = x[i] % (2 * np.pi) #x, shift the source point xj so that it lies in [0,2*pi]
yi = y[i] % (2 * np.pi) #y, shift the source point yj so that it lies in [0,2*pi]
zi = z[i] % (2 * np.pi) #z, shift the source point zj so that it lies in [0,2*pi]
mx = 1 + int(xi // hx) #index for the closest grid point
my = 1 + int(yi // hy) #index for the closest grid point
mz = 1 + int(zi // hz) #index for the closest grid point
xi = (xi - hx * mx) #offsets from the closest grid point
yi = (yi - hy * my) #offsets from the closest grid point
zi = (zi - hz * mz) #offsets from the closest grid point
@cuda.jit(device=True)
def matcopy_cuda( B, A ):
for i in range(3):
for j in range(3):
B[i, j] = A[i, j]
return B
@numba.cuda.jit(device=True)
def annhilate_k_device(K_val, C_val, output):
k_4 = numba.cuda.local.array(32, dtype=numba.float64)
project_val_cuda(K_val, k_4, 4)
for i in range(32):
k_4[i] = -k_4[i]
k_4[0] += K_val[0]
gp_device(k_4, C_val, output)
normalise_mv_device(output)
@cuda.jit(device=True)
def gmt_func_o31(value, other_value):
return 0 + -1.0*value[24]*other_value[7] + 1.0*value[1]*other_value[30] + -1.0*value[17]*other_value[14] + 1.0*value[15]*other_value[16] + 1.0*value[18]*other_value[13] + -1.0*value[14]*other_value[17] + 1.0*value[13]*other_value[18] + 1.0*value[19]*other_value[12] + -1.0*value[20]*other_value[11] + 1.0*value[12]*other_value[19] + -1.0*value[11]*other_value[20] + 1.0*value[21]*other_value[10] + 1.0*value[10]*other_value[21] + -1.0*value[22]*other_value[9] + 1.0*value[23]*other_value[8] + 1.0*value[31]*other_value[0] + -1.0*value[9]*other_value[22] + 1.0*value[8]*other_value[23] + -1.0*value[7]*other_value[24] + 1.0*value[25]*other_value[6] + 1.0*value[26]*other_value[5] + 1.0*value[6]*other_value[25] + -1.0*value[27]*other_value[4] + 1.0*value[5]*other_value[26] + 1.0*value[28]*other_value[3] + -1.0*value[4]*other_value[27] + -1.0*value[29]*other_value[2] + 1.0*value[3]*other_value[28] + 1.0*value[30]*other_value[1] + -1.0*value[2]*other_value[29] + 1.0*value[0]*other_value[31] + 1.0*value[16]*other_value[15]
@cuda.jit(device=True)
def vmuls_cuda( a, s ):
for i in range(3):
a[i] = a[i]*s
return a
@numba.cuda.jit(device=True)
def set_as_unit_rotor_device(array):
for j in range(1, 32):
array[j] = 0.0
array[0] = 1.0
@cuda.jit("(f8[:,:], f8[:,:], f8[:,:])")
def jacobi_relax_core(A, Anew, error):
n = A.shape[0]
m = A.shape[1]
j = cuda.threadIdx.y + cuda.blockIdx.y * cuda.blockDim.y
i = cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x
if j >= 1 and j < n - 1 and i >= 1 and i < m - 1:
Anew[j, i] = 0.25 * ( A[j, i + 1] + A[j, i - 1] \
+ A[j - 1, i] + A[j + 1, i])
error[j, i] = Anew[j, i] - A[j, i]
@numba.cuda.jit(device=True)
def cost_line_to_line_device(L1, L2):
R_val = numba.cuda.local.array(32, dtype=numba.float64)
rotor_between_lines_device(L1, L2, R_val)
return rotor_cost_device(R_val)
@cuda.jit
def gpu_zeros_1d(out):
i = cuda.grid(1)
if i < out.shape[0]:
out[i] = 0