How to use the pyopencl.array.empty function in pyopencl

To help you get started, we’ve selected a few pyopencl examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github zachjweiner / pystella / test / test_derivs.py View on Github external
xvec = np.meshgrid(*[dxi * np.arange(si, si + ni)
                         for dxi, si, ni in zip(dx, start, rank_shape)],
                       indexing='ij')

    phases = sum(ki * xi for ki, xi in zip(kvec, xvec))
    if h > 0:
        fx_h[h:-h, h:-h, h:-h] = np.sin(phases)
    else:
        fx_h[:] = np.sin(phases)
    fx_cos = np.cos(phases)

    fx = cla.empty(queue, pencil_shape, dtype)
    fx.set(fx_h)

    lap = cla.empty(queue, rank_shape, dtype)
    grd = cla.empty(queue, (3,)+rank_shape, dtype)
    derivs(queue, fx=fx, lap=lap, grd=grd)

    eff_kmag_sq = sum(get_evals_2(kvec_i, dxi) for dxi, kvec_i in zip(dx, kvec))

    lap_true = cla.to_device(queue, eff_kmag_sq * np.sin(phases))
    diff = clm.fabs(lap - lap_true)
    max_err = cla.max(diff) / cla.max(clm.fabs(lap_true))
    avg_err = cla.sum(diff) / cla.sum(clm.fabs(lap_true))

    max_rtol = 1.e-11 if dtype == np.float64 else 3.e-4
    avg_rtol = 1.e-12 if dtype == np.float64 else 5.e-5
    assert max_err < max_rtol and avg_err < avg_rtol, \
        "lap inaccurate for halo_shape=%s, grid_shape=%s, proc_shape=%s" \
        % (h, grid_shape, proc_shape)

    for i in range(3):
github pierrepaleo / sift_pyocl / test / test_image.py View on Github external
def test_gradient(self):
        """
        tests the gradient kernel (norm and orientation)
        """
        
        border_dist, peakthresh, EdgeThresh, EdgeThresh0, octsize, scale, nb_keypoints, width, height, DOGS, g = local_maxmin_setup()
        self.mat = numpy.ascontiguousarray(g[1])
        self.height, self.width = numpy.int32(self.mat.shape)
        self.gpu_mat = pyopencl.array.to_device(queue, self.mat)
        self.gpu_grad = pyopencl.array.empty(queue, self.mat.shape, dtype=numpy.float32, order="C")
        self.gpu_ori = pyopencl.array.empty(queue, self.mat.shape, dtype=numpy.float32, order="C")
        self.shape = calc_size((self.width, self.height), self.wg)

        t0 = time.time()
        k1 = self.program.compute_gradient_orientation(queue, self.shape, self.wg, self.gpu_mat.data, self.gpu_grad.data, self.gpu_ori.data, self.width, self.height)
        res_norm = self.gpu_grad.get()
        res_ori = self.gpu_ori.get()
        t1 = time.time()
        ref_norm, ref_ori = my_gradient(self.mat)
        t2 = time.time()
        delta_norm = abs(ref_norm - res_norm).max()
        delta_ori = abs(ref_ori - res_ori).max()
        if (PRINT_KEYPOINTS):
            rmin, cmin = 0, 0
            rmax, cmax = rmin+6, cmin+6
github pierrepaleo / sift_pyocl / test / test_image.py View on Github external
def test_local_maxmin(self):
        """
        tests the local maximum/minimum detection kernel
        """
        #local_maxmin_setup :
        border_dist, peakthresh, EdgeThresh, EdgeThresh0, octsize, s, nb_keypoints, width, height, DOGS, g = local_maxmin_setup()
        self.s = numpy.int32(s) #1, 2, 3 ... not 4 nor 0.
        self.gpu_dogs = pyopencl.array.to_device(queue, DOGS)
        self.output = pyopencl.array.empty(queue, (nb_keypoints, 4), dtype=numpy.float32, order="C")
        self.output.fill(-1.0, queue) #memset for invalid keypoints
        self.counter = pyopencl.array.zeros(queue, (1,), dtype=numpy.int32, order="C")
        nb_keypoints = numpy.int32(nb_keypoints)
        self.shape = calc_size((DOGS.shape[1], DOGS.shape[0] * DOGS.shape[2]), self.wg) #it's a 3D vector !!

        t0 = time.time()
        k1 = self.program.local_maxmin(queue, self.shape, self.wg,
        	self.gpu_dogs.data, self.output.data,
       		border_dist, peakthresh, octsize, EdgeThresh0, EdgeThresh,
       		self.counter.data, nb_keypoints, self.s, width, height)

        res = self.output.get()
        self.keypoints1 = self.output #for further use
        self.actual_nb_keypoints = self.counter.get()[0] #for further use

        t1 = time.time()
github mathuin / TopoMC / bathy.py View on Github external
# For now, at least, do not create retval or chunk buffer here.
            # Iterate through this entire mess once per depth level
            row_list = np.array([x for x in xrange(xlen)])
            negfound = False
            for row_chunk in chunks(row_list, best_rows):
                # Do not prepend buffer rows for first row.
                realfirst = row_chunk[0]
                if (row_chunk[0] != row_list[0]):
                    realfirst -= maxdepth
                # Do not postpend buffer rows for last row.
                reallast = row_chunk[-1]
                if (row_chunk[-1] != row_list[-1]):
                    reallast += maxdepth
                # Create retvals and chunk buffer here instead of above.
                chunk = np.copy(workingarr[realfirst*ylen:reallast*ylen])
                outchunk_buf = cla.empty(self.queue, chunk.shape, chunk.dtype)
                inchunk_buf = cla.to_device(self.queue, chunk)
                newxlen = reallast-realfirst
                newxlen_arg = np.uint32(newxlen)
                lenchunk = newxlen*ylen
                lenchunk_arg = np.uint32(lenchunk)
                currdepth = 0
                while (currdepth <= maxdepth):
                    currdepth_arg = np.uint32(currdepth)
                    if (currdepth % 2 == 0):
                        event = self.program.bathy(self.queue, self.global_size[self.devices[best_device]], self.local_size[self.devices[best_device]], outchunk_buf.data, inchunk_buf.data, newxlen_arg, ylen_arg, currdepth_arg, maxdepth_arg)
                    else:
                        event = self.program.bathy(self.queue, self.global_size[self.devices[best_device]], self.local_size[self.devices[best_device]], inchunk_buf.data, outchunk_buf.data, newxlen_arg, ylen_arg, currdepth_arg, maxdepth_arg)
                    event.wait()
                    currdepth += 1
                # cl.enqueue_copy(self.queue, retvals_arr, retvals_buf)
                # copy important parts of chunk_buf.data back 
github inducer / boxtree / boxtree / traversal.py View on Github external
level_start_target_or_target_parent_box_nrs, evt_tp = \
                extract_level_start_box_nrs(
                        target_or_target_parent_boxes, wait_for=wait_for)

        wait_for = [evt_s, evt_sp, evt_t, evt_tp]

        # }}}

        # {{{ box extents

        fin_debug("finding box extents")

        box_source_bounding_box_min = cl.array.empty(
                queue, (tree.dimensions, tree.aligned_nboxes),
                dtype=tree.coord_dtype)
        box_source_bounding_box_max = cl.array.empty(
                queue, (tree.dimensions, tree.aligned_nboxes),
                dtype=tree.coord_dtype)

        if tree.sources_are_targets:
            box_target_bounding_box_min = box_source_bounding_box_min
            box_target_bounding_box_max = box_source_bounding_box_max
        else:
            box_target_bounding_box_min = cl.array.empty(
                    queue, (tree.dimensions, tree.aligned_nboxes),
                    dtype=tree.coord_dtype)
            box_target_bounding_box_max = cl.array.empty(
                    queue, (tree.dimensions, tree.aligned_nboxes),
                    dtype=tree.coord_dtype)

        bogus_radii_array = cl.array.empty(queue, 1, dtype=tree.coord_dtype)
github silx-kit / silx / silx / opencl / codec / byte_offset.py View on Github external
:return: The decompressed image as an pyopencl array.
        :rtype: pyopencl.array
        """
        assert self.dec_size is not None, \
            "dec_size is a mandatory ByteOffset init argument for decompression"

        events = []
        with self.sem:
            len_raw = numpy.int32(len(raw))
            if len_raw > self.padded_raw_size:
                wg = self.block_size
                self.raw_size = int(len(raw))
                self.padded_raw_size = (self.raw_size + wg - 1) & ~(wg - 1)
                logger.info("increase raw buffer size to %s", self.padded_raw_size)
                buffers = {
                           "raw": pyopencl.array.empty(self.queue, self.padded_raw_size, dtype=numpy.int8),
                           "mask": pyopencl.array.empty(self.queue, self.padded_raw_size, dtype=numpy.int32),
                           "exceptions": pyopencl.array.empty(self.queue, self.padded_raw_size, dtype=numpy.int32),
                           "values": pyopencl.array.empty(self.queue, self.padded_raw_size, dtype=numpy.int32),
                          }
                self.cl_mem.update(buffers)
            else:
                wg = self.block_size

            evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["raw"].data,
                                        raw,
                                        is_blocking=False)
            events.append(EventDescription("copy raw H -> D", evt))
            evt = self.kernels.fill_int_mem(self.queue, (self.padded_raw_size,), (wg,),
                                            self.cl_mem["mask"].data,
                                            numpy.int32(self.padded_raw_size),
                                            numpy.int32(0),
github silx-kit / pyFAI / sandbox / profile_pixelsplitFull.py View on Github external
program.reduce2(queue, (workgroup_size,), (workgroup_size,), d_preresult.data, d_minmax.data)


min0 = pos[:, :, 0].min()
max0 = pos[:, :, 0].max()
min1 = pos[:, :, 1].min()
max1 = pos[:, :, 1].max()
minmax = (min0, max0, min1, max1)

print(minmax)
print(d_minmax)


memset_size = (bins + workgroup_size - 1) & ~(workgroup_size - 1),

d_outMax = cl.array.empty(queue, (bins,), dtype=numpy.int32)

program.memset_out_int(queue, memset_size, (workgroup_size,), d_outMax.data)

global_size = (size + workgroup_size - 1) & ~(workgroup_size - 1),

program.lut1(queue, global_size, (workgroup_size,), d_pos.data, d_minmax.data, numpy.uint32(size), d_outMax.data)


outMax_1 = numpy.copy(d_outMax)



d_idx_ptr = cl.array.empty(queue, (bins + 1,), dtype=numpy.int32)

d_lutsize = cl.array.empty(queue, (1,), dtype=numpy.int32)
github silx-kit / pyFAI / sandbox / profile_pixelsplitFull.py View on Github external
d_idx_ptr = cl.array.empty(queue, (bins + 1,), dtype=numpy.int32)

d_lutsize = cl.array.empty(queue, (1,), dtype=numpy.int32)

program.lut2(queue, (1,), (1,), d_outMax.data, d_idx_ptr.data, d_lutsize.data)

lutsize = numpy.ndarray(1, dtype=numpy.int32)

cl.enqueue_copy(queue, lutsize, d_lutsize.data)

print(lutsize)

lut_size = int(lutsize[0])

d_indices = cl.array.empty(queue, (lut_size,), dtype=numpy.int32)
d_data = cl.array.empty(queue, (lut_size,), dtype=numpy.float32)

# d_check_atomics = cl.Buffer(ctx, mf.READ_WRITE, 4*lut_size)


program.memset_out_int(queue, memset_size, (workgroup_size,), d_outMax.data)

d_outData = cl.array.empty(queue, (bins,), dtype=numpy.float32)
d_outCount = cl.array.empty(queue, (bins,), dtype=numpy.float32)
d_outMerge = cl.array.empty(queue, (bins,), dtype=numpy.float32)

program.lut3(queue, global_size, (workgroup_size,), d_pos.data, d_minmax.data, numpy.uint32(size), d_outMax.data, d_idx_ptr.data, d_indices.data, d_data.data)


outMax_2 = numpy.copy(d_outMax)
github silx-kit / pyFAI / sandbox / profile_ocl_lut_pixelsplit3.py View on Github external
outMax_2 = numpy.copy(d_outMax)

# check_atomics = numpy.ndarray(lut_size, dtype=numpy.int32)

# cl.enqueue_copy(queue, check_atomics, d_check_atomics)


program.memset_out(queue, memset_size, (workgroup_size,), d_outData.data, d_outCount.data, d_outMerge.data)




d_image = cl.array.to_device(queue, data)
d_image_float = cl.array.empty(queue, (size,), dtype=numpy.float32)

# program.s32_to_float(queue, global_size, (workgroup_size,), d_image.data, d_image_float)  # Pilatus1M
program.u16_to_float(queue, global_size, (workgroup_size,), d_image.data, d_image_float.data)  # halfccd

program.csr_integrate(queue, (bins * workgroup_size,), (workgroup_size,), d_image_float.data, d_data.data, d_indices.data, d_idx_ptr.data, d_outData.data, d_outCount.data, d_outMerge.data)


# outData  = numpy.ndarray(bins, dtype=numpy.float32)
# outCount = numpy.ndarray(bins, dtype=numpy.float32)
outMerge = numpy.ndarray(bins, dtype=numpy.float32)


# cl.enqueue_copy(queue,outData, d_outData)
# cl.enqueue_copy(queue,outCount, d_outCount)
cl.enqueue_copy(queue, outMerge, d_outMerge.data)
github rjw57 / dtcwt / devel / opencl-dev-2.py View on Github external
if input_array.shape != self.input_shape:
            raise ValueError('Input array does not have the shape prepared for')
        if input_array.dtype != np.float32:
            raise ValueError('Input array does not have float32 dtype')
        if workspace.size < self.workspace_size:
            raise ValueError('Workspace is too small ({0} vs {1})'.format(
                workspace.size, self.workspace_size))

        # Create workspace and output array views on workspace
        workspace_array = cla.Array(self.queue,
                input_array.shape, cla.vec.float2,
                data=workspace, offset=0)
        output_array = cla.Array(self.queue,
                input_array.shape, cla.vec.float4,
                data=workspace, offset=(workspace_array.size+1)>>1)
        output_array = cla.empty(self.queue, self.input_shape, cla.vec.float4)

        # output_array, workspace_array = output, workspace

        # C-style ordering where first index is horizontal
        input_array_strides = (1, input_array.shape[1],
                input_array.shape[1]*input_array.shape[0])

        # C-style ordering where first index is horizontal
        workspace_array_strides = (1, workspace_array.shape[1],
                workspace_array.shape[1] * workspace_array.shape[0])

        # Perform column convolution
        input_region = Region(input_array.base_data,
                input_array.offset,
                input_array.shape[::-1],
                (0,0), (1,1), input_array_strides)