How to use the pyopencl.array.to_device function in pyopencl

To help you get started, we’ve selected a few pyopencl examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pierrepaleo / sift_pyocl / test / test_keypoints_old.py View on Github external
if not(USE_CPP_SIFT) and (100 < keypoints_end-keypoints_start): print "NOTE: Python implementation of descriptors is slow. Do not handle more than 100 keypoints, or grab a coffee..."
        
        if (USE_CPU):
            print "Using CPU-optimized kernels"
            wg = 1,
            shape = keypoints.shape[0]*wg[0],
        else:
#            wg = (8, 8, 8)
#            shape = int(keypoints.shape[0]*wg[0]), 8, 8
            wg = (8, 4, 4)
            shape = int(keypoints.shape[0]*wg[0]), 4, 4
                        
        gpu_keypoints = pyopencl.array.to_device(queue, keypoints)
        #NOTE: for the following line, use pyopencl.array.empty instead of pyopencl.array.zeros if the keypoints are compacted
        gpu_descriptors = pyopencl.array.zeros(queue, (keypoints_end - keypoints_start, 128), dtype=numpy.uint8, order="C")
        gpu_grad = pyopencl.array.to_device(queue, grad)
        gpu_ori = pyopencl.array.to_device(queue, ori)

        keypoints_start, keypoints_end = numpy.int32(keypoints_start), numpy.int32(keypoints_end)
        grad_height, grad_width = numpy.int32(grad.shape)
        counter = pyopencl.array.to_device(queue, keypoints_end)
        
        t0 = time.time()
        k1 = self.program.descriptor(queue, shape, wg,
            gpu_keypoints.data, gpu_descriptors.data, gpu_grad.data, gpu_ori.data, numpy.int32(octsize),
            keypoints_start, counter.data, grad_width, grad_height)
        res = gpu_descriptors.get()
        t1 = time.time()

        if (USE_CPP_SIFT):
            import feature
            sc = feature.SiftAlignment()
github inducer / boxtree / boxtree / distributed / partition.py View on Github external
"""
        self.queue = queue
        self.traversal = traversal
        self.tree = traversal.tree

        # {{{ fetch tree structure and interaction lists to device memory

        self.box_parent_ids_dev = cl.array.to_device(queue, self.tree.box_parent_ids)
        self.target_boxes_dev = cl.array.to_device(queue, traversal.target_boxes)
        self.target_or_target_parent_boxes_dev = cl.array.to_device(
            queue, traversal.target_or_target_parent_boxes)

        # list 1
        self.neighbor_source_boxes_starts_dev = cl.array.to_device(
            queue, traversal.neighbor_source_boxes_starts)
        self.neighbor_source_boxes_lists_dev = cl.array.to_device(
            queue, traversal.neighbor_source_boxes_lists)

        # list 2
        self.from_sep_siblings_starts_dev = cl.array.to_device(
            queue, traversal.from_sep_siblings_starts)
        self.from_sep_siblings_lists_dev = cl.array.to_device(
            queue, traversal.from_sep_siblings_lists)

        # list 3
        self.target_boxes_sep_smaller_by_source_level_dev = np.empty(
            (self.tree.nlevels,), dtype=object)
        for ilevel in range(self.tree.nlevels):
            self.target_boxes_sep_smaller_by_source_level_dev[ilevel] = \
                cl.array.to_device(
                    queue,
                    traversal.target_boxes_sep_smaller_by_source_level[ilevel]
github inducer / boxtree / boxtree / distributed / partition.py View on Github external
self.from_sep_bigger_lists_dev = cl.array.to_device(
            queue, traversal.from_sep_bigger_lists)

        # }}}

        if self.tree.targets_have_extent:
            # list 3 close
            if traversal.from_sep_close_smaller_starts is not None:
                self.from_sep_close_smaller_starts_dev = cl.array.to_device(
                    queue, traversal.from_sep_close_smaller_starts)
                self.from_sep_close_smaller_lists_dev = cl.array.to_device(
                    queue, traversal.from_sep_close_smaller_lists)

            # list 4 close
            if traversal.from_sep_close_bigger_starts is not None:
                self.from_sep_close_bigger_starts_dev = cl.array.to_device(
                    queue, traversal.from_sep_close_bigger_starts)
                self.from_sep_close_bigger_lists_dev = cl.array.to_device(
                    queue, traversal.from_sep_close_bigger_lists)

        # helper kernel for ancestor box query
        self.mark_parent_knl = cl.elementwise.ElementwiseKernel(
            queue.context,
            "__global char *current, __global char *parent, "
            "__global %s *box_parent_ids" % dtype_to_ctype(self.tree.box_id_dtype),
            "if(i != 0 && current[i]) parent[box_parent_ids[i]] = 1"
        )

        # helper kernel for adding boxes from interaction list 1 and 4
        self.add_interaction_list_boxes = cl.elementwise.ElementwiseKernel(
            queue.context,
            Template("""
github KristofferC / FeynSimul / src / host.py View on Github external
pimcKernel.kernel = pimcKernel.prg.metropolis

    #Initial paths are created (the initial path vector is filled with zeros,
    #meaning no movement of the particles)
    try:
        pimcKernel.paths = cl.array.zeros(pimcKernel.queue,
                          (RP.nbrOfWalkers, RP.N * system.DOF),
                          np.float32)

        #Buffer for storing number of accepted values and
        #seeds for the xorshfitPRNG
        pimcKernel.accepts = cl.array.zeros(pimcKernel.queue,
                (pimcKernel.nbrOfThreads, ), np.uint32)

        #np.random.seed(0)
        pimcKernel.seeds = cl.array.to_device(pimcKernel.queue,
                         (np.random.randint(0, high = 2 ** 31 - 1,
                          size = (pimcKernel.nbrOfThreads + 1, 4))
                          ).astype(np.uint32))
        if RP.enableOperator:
            #pyopencl.array objects are created for storing
            #the calculated operator means from each thread
            pimcKernel.operatorValues = cl.array.zeros(pimcKernel.queue,
                    pimcKernel.nbrOfThreads * len(RP.operators), np.float32)

        if RP.enableCorrelator:
            #pyopencl.array objects are created for storing
            #the calculated operator means from each thread
            pimcKernel.correlatorValues = cl.array.zeros(pimcKernel.queue,
                    (RP.nbrOfWalkers, len(RP.correlators), RP.N / 2),
                    np.float32)
github benshope / PyOpenCL-Tutorial / 040_elementwise.py View on Github external
# Use OpenCL To Add Two Random Arrays (Using PyOpenCL Arrays and Elementwise)

import pyopencl as cl  # Import the OpenCL GPU computing API
import pyopencl.array as cl_array  # Import PyOpenCL Array (a Numpy array plus an OpenCL buffer object)
import numpy  # Import Numpy number tools

context = cl.create_some_context()  # Initialize the Context
queue = cl.CommandQueue(context)  # Instantiate a Queue

a = cl_array.to_device(queue, numpy.random.randn(10).astype(numpy.float32))  # Create a random pyopencl array
b = cl_array.to_device(queue, numpy.random.randn(10).astype(numpy.float32))  # Create a random pyopencl array
c = cl_array.empty_like(a)  # Create an empty pyopencl destination array

sum = cl.elementwise.ElementwiseKernel(context, "float *a, float *b, float *c", "c[i] = a[i] + b[i]", "sum")
# Create an elementwise kernel object
#  - Arguments: a string formatted as a C argument list
#  - Operation: a snippet of C that carries out the desired map operatino
#  - Name: the fuction name as which the kernel is compiled

sum(a, b, c)  # Call the elementwise kernel

print("a: {}".format(a))
print("b: {}".format(b))
print("c: {}".format(c))
# Print all three arrays, to show sum() worked
github inducer / loopy / examples / quadrature.py View on Github external
def make_well_conditioned_dev_matrix(queue, shape, dtype=np.float32, 
        order="C", ran_factor=1, id_factor=5, inc_factor=0, od=0):
    if isinstance(shape, int):
        shape = (shape, shape)
    l = max(shape)
    eye_ish = id_factor*np.eye(l, k=od)
    if inc_factor:
        eye_ish[np.arange(l), np.arange(l)] = inc_factor*np.arange(l)
    ary = np.asarray(
        ran_factor*np.random.randn(*shape)
        + eye_ish[:shape[0], :shape[1]],
        dtype=dtype, order=order)

    return cl_array.to_device(queue, ary)
github silx-kit / fabio / sandbox / cbf.py View on Github external
ctx = pyopencl.create_some_context(interactive=True)


fname = "testimages/run2_1_00148.cbf"
cbf = fabio.cbfimage.CbfImage()
data = fabio.open(fname).data
raw = cbf.read(fname, only_raw=True)
properties = pyopencl.command_queue_properties.PROFILING_ENABLE
# properties = None
queue = pyopencl.CommandQueue(ctx, properties=properties)


raw_n = numpy.fromstring(raw, dtype="int8")
size = raw_n.size
raw_d = pyopencl.array.to_device(queue, raw_n)
int_d = pyopencl.array.empty(queue, (size,), dtype="int32")
data_d = pyopencl.array.empty(queue, (data.size,), dtype="int32")
tmp1_d = pyopencl.array.zeros_like(data_d)
tmp2_d = pyopencl.array.zeros_like(data_d)
tmp3_d = pyopencl.array.zeros_like(data_d)
lem_d = pyopencl.array.empty_like(data_d)
zero_d = pyopencl.array.zeros(queue, shape=1, dtype="int32")

src = open("sandbox/cbf.cl").read()
prg = pyopencl.Program(ctx, src).build()

for i in range(11):
    WG = 1 << i
    print("#" * 80)
    print("WG: %s" % WG)
    la = pyopencl.LocalMemory(4 * WG)
github libtangle / qcgpu / qcgpu / backend.py View on Github external
def probabilities(self):
        """Gets the squared absolute value of each of the amplitudes"""
        out = pycl_array.to_device(
            self.queue,
            np.zeros(2**self.num_qubits, dtype=np.float32)
        )

        program.calculate_probabilities(
            self.queue,
            out.shape,
            None,
            self.buffer.data,
            out.data
        )

        return out.get()
github benshope / PyOpenCL-Tutorial / examples / subset_dot_cl.py View on Github external
import numpy as np
import pyopencl as cl
import pyopencl.array

ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

n = 3

a = np.array(range(0,n**2))
print a
a = a.astype(np.float32)
print a
g_a = cl.array.to_device(queue, a)

start = 2
end = 7
subset = cl.array.to_device(queue, np.array(range(start,end)))
print subset.dtype

print "Subset Array", subset
print cl.array.subset_dot(subset,g_a,g_a)
print np.dot(a[start:end], a[start:end])