How to use the numba.cuda.local.array function in numba

To help you get started, we’ve selected a few numba examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ucbdrive / 3d-vehicle-tracking / 3d-tracking / tools / object-ap-eval / rotate_iou.py View on Github external
def inter(rbbox1, rbbox2):
    corners1 = cuda.local.array((8,), dtype=numba.float32)
    corners2 = cuda.local.array((8,), dtype=numba.float32)
    intersection_corners = cuda.local.array((16,), dtype=numba.float32)

    rbbox_to_corners(corners1, rbbox1)
    rbbox_to_corners(corners2, rbbox2)

    num_intersection = quadrilateral_intersection(corners1, corners2,
                                                  intersection_corners)
    sort_vertex_in_convex_polygon(intersection_corners, num_intersection)
    # print(intersection_corners.reshape([-1, 2])[:num_intersection])

    return area(intersection_corners, num_intersection)
github traveller59 / second.pytorch / second / core / non_max_suppression / nms_gpu.py View on Github external
def quadrilateral_intersection(pts1, pts2, int_pts):
    num_of_inter = 0
    for i in range(4):
        if point_in_quadrilateral(pts1[2 * i], pts1[2 * i + 1], pts2):
            int_pts[num_of_inter * 2] = pts1[2 * i]
            int_pts[num_of_inter * 2 + 1] = pts1[2 * i + 1]
            num_of_inter += 1
        if point_in_quadrilateral(pts2[2 * i], pts2[2 * i + 1], pts1):
            int_pts[num_of_inter * 2] = pts2[2 * i]
            int_pts[num_of_inter * 2 + 1] = pts2[2 * i + 1]
            num_of_inter += 1
    temp_pts = cuda.local.array((2, ), dtype=numba.float32)
    for i in range(4):
        for j in range(4):
            has_pts = line_segment_intersection(pts1, pts2, i, j, temp_pts)
            if has_pts:
                int_pts[num_of_inter * 2] = temp_pts[0]
                int_pts[num_of_inter * 2 + 1] = temp_pts[1]
                num_of_inter += 1

    return num_of_inter
github traveller59 / second.pytorch / second / core / non_max_suppression / nms_gpu.py View on Github external
def inter(rbbox1, rbbox2):
    corners1 = cuda.local.array((8, ), dtype=numba.float32)
    corners2 = cuda.local.array((8, ), dtype=numba.float32)
    intersection_corners = cuda.local.array((16, ), dtype=numba.float32)

    rbbox_to_corners(corners1, rbbox1)
    rbbox_to_corners(corners2, rbbox2)

    num_intersection = quadrilateral_intersection(corners1, corners2,
                                                  intersection_corners)
    sort_vertex_in_convex_polygon(intersection_corners, num_intersection)
    # print(intersection_corners.reshape([-1, 2])[:num_intersection])

    return area(intersection_corners, num_intersection)
github traveller59 / second.pytorch / second / core / non_max_suppression / nms_gpu.py View on Github external
def sort_vertex_in_convex_polygon(int_pts, num_of_inter):
    if num_of_inter > 0:
        center = cuda.local.array((2, ), dtype=numba.float32)
        center[:] = 0.0
        for i in range(num_of_inter):
            center[0] += int_pts[2 * i]
            center[1] += int_pts[2 * i + 1]
        center[0] /= num_of_inter
        center[1] /= num_of_inter
        v = cuda.local.array((2, ), dtype=numba.float32)
        vs = cuda.local.array((16, ), dtype=numba.float32)
        for i in range(num_of_inter):
            v[0] = int_pts[2 * i] - center[0]
            v[1] = int_pts[2 * i + 1] - center[1]
            d = math.sqrt(v[0] * v[0] + v[1] * v[1])
            v[0] = v[0] / d
            v[1] = v[1] / d
            if v[1] < 0:
                v[0] = -2 - v[0]
            vs[i] = v[0]
        j = 0
        temp = 0
        for i in range(1, num_of_inter):
            if vs[i - 1] > vs[i]:
                temp = vs[i]
                tx = int_pts[2 * i]
github VadarSkyWalker / openLBMPM / RKCG2D / AcceleratedRKGPU2D.py View on Github external
def calPerturbationFromForce2DMRT(totalNodes, xDim, optionF, tauR, tauB, deltaValue, \
                               weightsCoeff, unitEX, unitEY, physicalVX, physicalVY, \
                               forceX, forceY, colorValue, fluidTotalPDF, transformationM, \
                               inverseTM, collisionS, fluidRhoR, fluidRhoB):
    tx = cuda.threadIdx.x; bx = cuda.blockIdx.x; bDimX = cuda.blockDim.x
    by = cuda.blockIdx.y
    indices = by * xDim + bx * bDimX + tx

#    sharedEX = cuda.shared.array((9,), dtype = float64)
#    sharedEY = cuda.shared.array((9,), dtype = float64)
#    sharedWeights = cuda.shared.array((9,), dtype = float64)
#    sharedTM = cuda.shared.array(shape = (9, 9), dtype = float64)
#    sharedIM = cuda.shared.array(shape = (9, 9), dtype = float64)

    localCollisionS = cuda.shared.array(shape = (9,), dtype = float64)
    localSource = cuda.local.array(shape = (9,), dtype = float64)
    localTransform = cuda.local.array(shape = (9,), dtype = float64)
#    for i in range(9):
#        sharedEX[i] = unitEX[i]; sharedEY[i] = unitEY[i]
#        sharedWeights[i] = weightsCoeff[i]
#        for j in range(9):
#            sharedTM[i, j] = transformationM[i, j]
#            sharedIM[i, j] = inverseTM[i, j]
    for i in range(9):
        localCollisionS[i] = 1. - 0.5 * collisionS[i]
    if indices < totalNodes:
        Phi = colorValue[indices]; tmpTau = 1.
        if Phi > deltaValue:
            tmpTau = tauR
        elif Phi < -deltaValue:
            tmpTau = tauB
        elif math.fabs(Phi) <= deltaValue:
github traveller59 / second.pytorch / second / core / non_max_suppression / nms_gpu.py View on Github external
def line_segment_intersection(pts1, pts2, i, j, temp_pts):
    A = cuda.local.array((2, ), dtype=numba.float32)
    B = cuda.local.array((2, ), dtype=numba.float32)
    C = cuda.local.array((2, ), dtype=numba.float32)
    D = cuda.local.array((2, ), dtype=numba.float32)

    A[0] = pts1[2 * i]
    A[1] = pts1[2 * i + 1]

    B[0] = pts1[2 * ((i + 1) % 4)]
    B[1] = pts1[2 * ((i + 1) % 4) + 1]

    C[0] = pts2[2 * j]
    C[1] = pts2[2 * j + 1]

    D[0] = pts2[2 * ((j + 1) % 4)]
    D[1] = pts2[2 * ((j + 1) % 4) + 1]
    BA0 = B[0] - A[0]
    BA1 = B[1] - A[1]
    DA0 = D[0] - A[0]
    CA0 = C[0] - A[0]
github Letianwu / ZMCintegral / ZMCintegral / ZMCintegral.py View on Github external
digit_store = cuda.local.array(shape=dim, dtype=nb.int64)
                    for i_temp in range(dim):
                        digit_store[i_temp] = 0
                    
                    # convert one_d index to dim_d index
                    # result will be stored in digit_store
                    oneD_to_nD(n_chunk_x,chunk_id,digit_store)
            
                    # specisify the local domain
                    domain_left = cuda.local.array(dim, dtype=nb.float64)
                    for j_dim in range(dim):
                        domain_left[j_dim] = domain[j_dim][0] + digit_store[j_dim] * domain_range[j_dim]
            
                    for i_sample in range(chunk_size):
                        # x_tuple: local axis values for each thread
                        x_tuple = cuda.local.array(dim, dtype=nb.float64)
                
                        for j_dim in range(dim):
                            x_tuple[j_dim] = xoroshiro128p_uniform_float64(rng_states, thread_id)                                                            *domain_range[j_dim] + domain_left[j_dim]
                
                        # feed in values to user defined function
                        cuda.atomic.add(MCresult, thread_id, fun(x_tuple))
github peng-cao / mripy / bloch_sim / sim_spin_cuda.py View on Github external
phi= -0.5 * np.pi # rf amplitude
    theta= 0. * np.pi # rf phase
    T = 0.1 # relaxation time
    T1 = 1. # T1
    T2 = 1000. # T2
    df = 0.  # freq offset
    PD = 0.5  # proton density

    # claim local memory
    Rz   = cuda.local.array(shape=(3, 3), dtype=numba.float64)
    Rx   = cuda.local.array(shape=(3, 3), dtype=numba.float64)
    Mtmp = cuda.local.array(shape=3,      dtype=numba.float64)
    M    = cuda.local.array(shape=3,      dtype=numba.float64)    
    Rth  = cuda.local.array(shape=(3, 3), dtype=numba.float64)
    Rtho = cuda.local.array(shape=(3, 3), dtype=numba.float64)
    Em   = cuda.local.array(shape=(3, 3), dtype=numba.float64)#float32
    Afp  = cuda.local.array(shape=(3, 3), dtype=numba.float64)#float32
    Bfp  = cuda.local.array(shape=3,      dtype=numba.float64)#float32
 

    #simple test    
    #Rz_cuda(Rz, phi)
    #Rx_cuda(Rx, theta)
    #matmulv_cuda(Mtmp,Rz,M)
    #veccopy_cuda(M, Mtmp)

    # M0=[0 0 1] should be proton density weighted
    veccopy_cuda(M, M0)
    vmuls_cuda(M, PD)
    #excitation
    throt_cuda( Rtho, Rz, Rx, Rth, phi, theta )
github ucbdrive / 3d-vehicle-tracking / 3d-tracking / tools / object-ap-eval / rotate_iou.py View on Github external
def line_segment_intersection(pts1, pts2, i, j, temp_pts):
    A = cuda.local.array((2,), dtype=numba.float32)
    B = cuda.local.array((2,), dtype=numba.float32)
    C = cuda.local.array((2,), dtype=numba.float32)
    D = cuda.local.array((2,), dtype=numba.float32)

    A[0] = pts1[2 * i]
    A[1] = pts1[2 * i + 1]

    B[0] = pts1[2 * ((i + 1) % 4)]
    B[1] = pts1[2 * ((i + 1) % 4) + 1]

    C[0] = pts2[2 * j]
    C[1] = pts2[2 * j + 1]

    D[0] = pts2[2 * ((j + 1) % 4)]
    D[1] = pts2[2 * ((j + 1) % 4) + 1]
    BA0 = B[0] - A[0]
github pygae / clifford / clifford / tools / g3c / cuda.py View on Github external
def square_root_of_rotor_device(rotor, rotor_root):
    k_value = numba.cuda.local.array(32, dtype=numba.float64)
    sigma_val = numba.cuda.local.array(32, dtype=numba.float64)
    C_val = numba.cuda.local.array(32, dtype=numba.float64)
    for i in range(32):
        C_val[i] = rotor[i]
    C_val[0] += 1.0
    gp_mult_with_adjoint(C_val, sigma_val)
    positive_root_device(sigma_val, k_value)
    annhilate_k_device(k_value, C_val, rotor_root)