Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def inter(rbbox1, rbbox2):
corners1 = cuda.local.array((8,), dtype=numba.float32)
corners2 = cuda.local.array((8,), dtype=numba.float32)
intersection_corners = cuda.local.array((16,), dtype=numba.float32)
rbbox_to_corners(corners1, rbbox1)
rbbox_to_corners(corners2, rbbox2)
num_intersection = quadrilateral_intersection(corners1, corners2,
intersection_corners)
sort_vertex_in_convex_polygon(intersection_corners, num_intersection)
# print(intersection_corners.reshape([-1, 2])[:num_intersection])
return area(intersection_corners, num_intersection)
def quadrilateral_intersection(pts1, pts2, int_pts):
num_of_inter = 0
for i in range(4):
if point_in_quadrilateral(pts1[2 * i], pts1[2 * i + 1], pts2):
int_pts[num_of_inter * 2] = pts1[2 * i]
int_pts[num_of_inter * 2 + 1] = pts1[2 * i + 1]
num_of_inter += 1
if point_in_quadrilateral(pts2[2 * i], pts2[2 * i + 1], pts1):
int_pts[num_of_inter * 2] = pts2[2 * i]
int_pts[num_of_inter * 2 + 1] = pts2[2 * i + 1]
num_of_inter += 1
temp_pts = cuda.local.array((2, ), dtype=numba.float32)
for i in range(4):
for j in range(4):
has_pts = line_segment_intersection(pts1, pts2, i, j, temp_pts)
if has_pts:
int_pts[num_of_inter * 2] = temp_pts[0]
int_pts[num_of_inter * 2 + 1] = temp_pts[1]
num_of_inter += 1
return num_of_inter
def inter(rbbox1, rbbox2):
corners1 = cuda.local.array((8, ), dtype=numba.float32)
corners2 = cuda.local.array((8, ), dtype=numba.float32)
intersection_corners = cuda.local.array((16, ), dtype=numba.float32)
rbbox_to_corners(corners1, rbbox1)
rbbox_to_corners(corners2, rbbox2)
num_intersection = quadrilateral_intersection(corners1, corners2,
intersection_corners)
sort_vertex_in_convex_polygon(intersection_corners, num_intersection)
# print(intersection_corners.reshape([-1, 2])[:num_intersection])
return area(intersection_corners, num_intersection)
def sort_vertex_in_convex_polygon(int_pts, num_of_inter):
if num_of_inter > 0:
center = cuda.local.array((2, ), dtype=numba.float32)
center[:] = 0.0
for i in range(num_of_inter):
center[0] += int_pts[2 * i]
center[1] += int_pts[2 * i + 1]
center[0] /= num_of_inter
center[1] /= num_of_inter
v = cuda.local.array((2, ), dtype=numba.float32)
vs = cuda.local.array((16, ), dtype=numba.float32)
for i in range(num_of_inter):
v[0] = int_pts[2 * i] - center[0]
v[1] = int_pts[2 * i + 1] - center[1]
d = math.sqrt(v[0] * v[0] + v[1] * v[1])
v[0] = v[0] / d
v[1] = v[1] / d
if v[1] < 0:
v[0] = -2 - v[0]
vs[i] = v[0]
j = 0
temp = 0
for i in range(1, num_of_inter):
if vs[i - 1] > vs[i]:
temp = vs[i]
tx = int_pts[2 * i]
def calPerturbationFromForce2DMRT(totalNodes, xDim, optionF, tauR, tauB, deltaValue, \
weightsCoeff, unitEX, unitEY, physicalVX, physicalVY, \
forceX, forceY, colorValue, fluidTotalPDF, transformationM, \
inverseTM, collisionS, fluidRhoR, fluidRhoB):
tx = cuda.threadIdx.x; bx = cuda.blockIdx.x; bDimX = cuda.blockDim.x
by = cuda.blockIdx.y
indices = by * xDim + bx * bDimX + tx
# sharedEX = cuda.shared.array((9,), dtype = float64)
# sharedEY = cuda.shared.array((9,), dtype = float64)
# sharedWeights = cuda.shared.array((9,), dtype = float64)
# sharedTM = cuda.shared.array(shape = (9, 9), dtype = float64)
# sharedIM = cuda.shared.array(shape = (9, 9), dtype = float64)
localCollisionS = cuda.shared.array(shape = (9,), dtype = float64)
localSource = cuda.local.array(shape = (9,), dtype = float64)
localTransform = cuda.local.array(shape = (9,), dtype = float64)
# for i in range(9):
# sharedEX[i] = unitEX[i]; sharedEY[i] = unitEY[i]
# sharedWeights[i] = weightsCoeff[i]
# for j in range(9):
# sharedTM[i, j] = transformationM[i, j]
# sharedIM[i, j] = inverseTM[i, j]
for i in range(9):
localCollisionS[i] = 1. - 0.5 * collisionS[i]
if indices < totalNodes:
Phi = colorValue[indices]; tmpTau = 1.
if Phi > deltaValue:
tmpTau = tauR
elif Phi < -deltaValue:
tmpTau = tauB
elif math.fabs(Phi) <= deltaValue:
def line_segment_intersection(pts1, pts2, i, j, temp_pts):
A = cuda.local.array((2, ), dtype=numba.float32)
B = cuda.local.array((2, ), dtype=numba.float32)
C = cuda.local.array((2, ), dtype=numba.float32)
D = cuda.local.array((2, ), dtype=numba.float32)
A[0] = pts1[2 * i]
A[1] = pts1[2 * i + 1]
B[0] = pts1[2 * ((i + 1) % 4)]
B[1] = pts1[2 * ((i + 1) % 4) + 1]
C[0] = pts2[2 * j]
C[1] = pts2[2 * j + 1]
D[0] = pts2[2 * ((j + 1) % 4)]
D[1] = pts2[2 * ((j + 1) % 4) + 1]
BA0 = B[0] - A[0]
BA1 = B[1] - A[1]
DA0 = D[0] - A[0]
CA0 = C[0] - A[0]
digit_store = cuda.local.array(shape=dim, dtype=nb.int64)
for i_temp in range(dim):
digit_store[i_temp] = 0
# convert one_d index to dim_d index
# result will be stored in digit_store
oneD_to_nD(n_chunk_x,chunk_id,digit_store)
# specisify the local domain
domain_left = cuda.local.array(dim, dtype=nb.float64)
for j_dim in range(dim):
domain_left[j_dim] = domain[j_dim][0] + digit_store[j_dim] * domain_range[j_dim]
for i_sample in range(chunk_size):
# x_tuple: local axis values for each thread
x_tuple = cuda.local.array(dim, dtype=nb.float64)
for j_dim in range(dim):
x_tuple[j_dim] = xoroshiro128p_uniform_float64(rng_states, thread_id) *domain_range[j_dim] + domain_left[j_dim]
# feed in values to user defined function
cuda.atomic.add(MCresult, thread_id, fun(x_tuple))
phi= -0.5 * np.pi # rf amplitude
theta= 0. * np.pi # rf phase
T = 0.1 # relaxation time
T1 = 1. # T1
T2 = 1000. # T2
df = 0. # freq offset
PD = 0.5 # proton density
# claim local memory
Rz = cuda.local.array(shape=(3, 3), dtype=numba.float64)
Rx = cuda.local.array(shape=(3, 3), dtype=numba.float64)
Mtmp = cuda.local.array(shape=3, dtype=numba.float64)
M = cuda.local.array(shape=3, dtype=numba.float64)
Rth = cuda.local.array(shape=(3, 3), dtype=numba.float64)
Rtho = cuda.local.array(shape=(3, 3), dtype=numba.float64)
Em = cuda.local.array(shape=(3, 3), dtype=numba.float64)#float32
Afp = cuda.local.array(shape=(3, 3), dtype=numba.float64)#float32
Bfp = cuda.local.array(shape=3, dtype=numba.float64)#float32
#simple test
#Rz_cuda(Rz, phi)
#Rx_cuda(Rx, theta)
#matmulv_cuda(Mtmp,Rz,M)
#veccopy_cuda(M, Mtmp)
# M0=[0 0 1] should be proton density weighted
veccopy_cuda(M, M0)
vmuls_cuda(M, PD)
#excitation
throt_cuda( Rtho, Rz, Rx, Rth, phi, theta )
def line_segment_intersection(pts1, pts2, i, j, temp_pts):
A = cuda.local.array((2,), dtype=numba.float32)
B = cuda.local.array((2,), dtype=numba.float32)
C = cuda.local.array((2,), dtype=numba.float32)
D = cuda.local.array((2,), dtype=numba.float32)
A[0] = pts1[2 * i]
A[1] = pts1[2 * i + 1]
B[0] = pts1[2 * ((i + 1) % 4)]
B[1] = pts1[2 * ((i + 1) % 4) + 1]
C[0] = pts2[2 * j]
C[1] = pts2[2 * j + 1]
D[0] = pts2[2 * ((j + 1) % 4)]
D[1] = pts2[2 * ((j + 1) % 4) + 1]
BA0 = B[0] - A[0]
def square_root_of_rotor_device(rotor, rotor_root):
k_value = numba.cuda.local.array(32, dtype=numba.float64)
sigma_val = numba.cuda.local.array(32, dtype=numba.float64)
C_val = numba.cuda.local.array(32, dtype=numba.float64)
for i in range(32):
C_val[i] = rotor[i]
C_val[0] += 1.0
gp_mult_with_adjoint(C_val, sigma_val)
positive_root_device(sigma_val, k_value)
annhilate_k_device(k_value, C_val, rotor_root)