How to use the pyopencl.mem_flags function in pyopencl

To help you get started, we’ve selected a few pyopencl examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github oysstu / pyopencl-in-action / ch10 / reduction_complete.py View on Github external
print('WG Max Size: ' + str(wg_max_size))
print('Num groups: ' + str(num_groups))
print('Local mem size: ' + str(dev.local_mem_size))

# Print the preferred/native floatN lengths (which is optimal for the compiler/hardware, respectively)
# Vectorization can still yield higher throughput even if preferred/native is 1, due to better use of memory bandwidth
print('Preferred floatN size: ' + str(dev.preferred_vector_width_float))
print('Preferred floatN size: ' + str(dev.native_vector_width_float))

# Data and device buffers
data = np.arange(start=0, stop=ARRAY_SIZE, dtype=np.float32)
result = np.zeros(shape=(1,), dtype=np.float32)

data_buffer = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data)
sum_buffer = cl.Buffer(context, cl.mem_flags.WRITE_ONLY, size=np.dtype(np.float32).itemsize)

partial_sums = cl.LocalMemory(wg_max_size * np.dtype(np.float32).itemsize * VECTOR_LENGTH)

# Execute kernels
local_size = wg_max_size
global_size = ARRAY_SIZE // VECTOR_LENGTH
start_event = prog.reduction_vector(queue, (global_size,), (local_size,), data_buffer, partial_sums)
print('\nGlobal size: ' + str(global_size))

# There is some overhead involved with spawning a new kernel (code caching)
# A good rule of thumb is therefore to create the kernel object outside of loops
# Ref: https://lists.tiker.net/pipermail/pyopencl/2016-February/002107.html
kernel_reduction_vector = prog.reduction_vector

# Perform successive stages of reduction
while global_size // local_size > local_size:
github hervold / py2opencl / py2opencl / driver.py View on Github external
def apply(self, *new_arrays):
        """
        executes kernel against arguments previously bound to self via bind method

        new arguments can be supplied in order to avoid re-generation of the kernel for repeated use,
        but they MUST match the type and shape of the initially bound arguments
        """
        if new_arrays:
            self.arrays = new_arrays

        mf = cl.mem_flags
        res_np = np.zeros( shape=self.arrays[0].shape, dtype=self.return_typ )

        buffs = [ cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=arr )
                  for arr in self.arrays ]
        # results:
        buffs.append( cl.Buffer(self.ctx, mf.WRITE_ONLY, res_np.nbytes) )
        # run!
        self.prog.sum(self.queue, self.arrays[0].shape, None, *buffs)

        cl.enqueue_copy( self.queue, res_np, buffs[-1] )
        return res_np.copy()
github dscho / Xpra / trunk / src / xpra / codecs / csc_opencl / colorspace_converter.py View on Github external
def gen_rgb_to_yuv():
    global context
    from xpra.codecs.csc_opencl.opencl_kernels import gen_rgb_to_yuv_kernels, rgb_mode_to_indexes, indexes_to_rgb_mode
    #for RGB to YUV support we need to be able to handle the channel_order,
    #with READ_ONLY and both with COPY_HOST_PTR and USE_HOST_PTR since we
    #do not know in advance which one we can use..
    RGB_to_YUV_KERNELS = {}
    sif = pyopencl.get_supported_image_formats(context, mem_flags.WRITE_ONLY,  pyopencl.mem_object_type.IMAGE2D)
    sif_copy = pyopencl.get_supported_image_formats(context, mem_flags.READ_ONLY | mem_flags.COPY_HOST_PTR,  pyopencl.mem_object_type.IMAGE2D)
    debug("get_supported_image_formats(READ_ONLY | COPY_HOST_PTR, IMAGE2D)=%s", sif)
    sif_use = pyopencl.get_supported_image_formats(context, mem_flags.READ_ONLY | mem_flags.USE_HOST_PTR,  pyopencl.mem_object_type.IMAGE2D)
    debug("get_supported_image_formats(READ_ONLY | USE_HOST_PTR, IMAGE2D)=%s", sif)
    if not has_image_format(sif_copy, pyopencl.channel_order.R, pyopencl.channel_type.UNSIGNED_INT8) or \
       not has_image_format(sif_use, pyopencl.channel_order.R, pyopencl.channel_type.UNSIGNED_INT8):
        log.error("cannot convert to YUV without support for READ_ONLY R channel with both COPY_HOST_PTR and USE_HOST_PTR")
        return  {}
    missing = []
    found_rgb = set()
    def add_rgb_to_yuv(src_rgb_mode, kernel_rgb_mode, upload_rgb_mode, channel_order):
        debug("add_rgb_to_yuv%s", (src_rgb_mode, kernel_rgb_mode, upload_rgb_mode, CHANNEL_ORDER_TO_STR.get(channel_order)))
        kernels = gen_rgb_to_yuv_kernels(kernel_rgb_mode)
        #debug("kernels(%s)=%s", rgb_mode, kernels)
        for key, k_def in kernels.items():
            ksrc, dst = key
            assert ksrc==kernel_rgb_mode
            kname, ksrc = k_def
            RGB_to_YUV_KERNELS[(src_rgb_mode, dst)] = (kname, upload_rgb_mode, channel_order, ksrc)
github dscho / Xpra / trunk / src / xpra / codecs / csc_opencl / colorspace_converter.py View on Github external
divs = get_subsampling_divs(self.src_format)
        wwidth = dimdiv(self.dst_width, max(x_div for x_div, _ in divs))
        wheight = dimdiv(self.dst_height, max(y_div for _, y_div in divs))
        globalWorkSize, localWorkSize  = self.get_work_sizes(wwidth, wheight)

        kernelargs = [self.queue, globalWorkSize, localWorkSize]

        iformat = pyopencl.ImageFormat(pyopencl.channel_order.R, pyopencl.channel_type.UNSIGNED_INT8)
        input_images = []
        for i in range(3):
            _, y_div = divs[i]
            plane = pixels[i]
            if type(plane)==str:
                flags = mem_flags.READ_ONLY | mem_flags.COPY_HOST_PTR
            else:
                flags = mem_flags.READ_ONLY | mem_flags.USE_HOST_PTR
            shape = strides[i], self.src_height/y_div
            iimage = pyopencl.Image(context, flags, iformat, shape=shape, hostbuf=plane)
            input_images.append(iimage)

        #output image:
        oformat = pyopencl.ImageFormat(self.channel_order, pyopencl.channel_type.UNORM_INT8)
        oimage = pyopencl.Image(context, mem_flags.WRITE_ONLY, oformat, shape=(self.dst_width, self.dst_height))

        kernelargs += input_images + [numpy.int32(self.src_width), numpy.int32(self.src_height),
                       numpy.int32(self.dst_width), numpy.int32(self.dst_height),
                       self.sampler, oimage]

        kstart = time.time()
        debug("convert_image(%s) calling %s%s after upload took %.1fms",
              image, self.kernel_function_name, tuple(kernelargs), 1000.0*(kstart-start))
        self.kernel_function(*kernelargs)
github robbert-harms / MOT / mot / cl_routines / sampling / metropolis_hastings.py View on Github external
hostbuf=self._current_chain_position)
        data_buffers.append(current_chain_position_buffer)
        readout_items.append([current_chain_position_buffer, self._current_chain_position])

        proposal_buffer = cl.Buffer(self._cl_context,
                                    cl.mem_flags.READ_WRITE | cl.mem_flags.USE_HOST_PTR,
                                    hostbuf=self._proposal_state)
        data_buffers.append(proposal_buffer)
        readout_items.append([proposal_buffer, self._proposal_state])

        mcmc_state_buffers = {}
        for mcmc_state_element in sorted(self._mh_state_dict):
            host_array = self._mh_state_dict[mcmc_state_element]['data']

            buffer = cl.Buffer(self._cl_context,
                               cl.mem_flags.READ_WRITE | cl.mem_flags.USE_HOST_PTR,
                               hostbuf=host_array)
            mcmc_state_buffers[mcmc_state_element] = buffer

            data_buffers.append(buffer)
            readout_items.append([buffer, host_array])

        data_buffers.append(cl.LocalMemory(workgroup_size * np.dtype('double').itemsize))
        data_buffers.extend(self._data_struct_manager.get_kernel_inputs(self._cl_context, workgroup_size))

        return data_buffers, readout_items
github robbert-harms / MOT / mot / cl_routines / mapping / final_parameters_transformer.py View on Github external
def _create_buffers(self):
        all_buffers = []
        parameters_buffer = cl.Buffer(self._cl_run_context.context,
                                      cl.mem_flags.READ_WRITE | cl.mem_flags.USE_HOST_PTR,
                                      hostbuf=self._parameters)
        all_buffers.append(parameters_buffer)

        for data in self._model.get_data():
            all_buffers.append(cl.Buffer(self._cl_run_context.context,
                                         cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data))

        return all_buffers, parameters_buffer
github benshope / PyOpenCL-Tutorial / 021_array_sum.py View on Github external
# Use OpenCL To Add Two Random Arrays (This Way Shows Details)

import pyopencl as cl  # Import the OpenCL GPU computing API
import numpy as np  # Import Np number tools

platform = cl.get_platforms()[0]  # Select the first platform [0]
device = platform.get_devices()[0]  # Select the first device on this platform [0]
context = cl.Context([device])  # Create a context with your device
queue = cl.CommandQueue(context)  # Create a command queue with your context

np_a = np.random.rand(50000).astype(np.float32)  # Create a random np array
np_b = np.random.rand(50000).astype(np.float32)  # Create a random np array
np_c = np.empty_like(np_a)  # Create an empty destination array

cl_a = cl.Buffer(context, cl.mem_flags.COPY_HOST_PTR, hostbuf=np_a)
cl_b = cl.Buffer(context, cl.mem_flags.COPY_HOST_PTR, hostbuf=np_b)
cl_c = cl.Buffer(context, cl.mem_flags.WRITE_ONLY, np_c.nbytes)
# Create three buffers (plans for areas of memory on the device)

kernel = """__kernel void sum(__global float* a, __global float* b, __global float* c)
{
    int i = get_global_id(0);
    c[i] = a[i] + b[i];
}"""  # Create a kernel (a string containing C-like OpenCL device code)

program = cl.Program(context, kernel).build()
# Compile the kernel code into an executable OpenCL program

program.sum(queue, np_a.shape, None, cl_a, cl_b, cl_c)
# Enqueue the program for execution, causing data to be copied to the device
#  - queue: the command queue the program will be sent to
github robbert-harms / MOT / mot / cl_routines / mapping / objective_list_calculator.py View on Github external
def _create_buffers(self):
        objectives_buffer = cl.Buffer(self._cl_run_context.context,
                                  cl.mem_flags.WRITE_ONLY | cl.mem_flags.USE_HOST_PTR,
                                  hostbuf=self._objectives)

        all_buffers = [cl.Buffer(self._cl_run_context.context,
                                 cl.mem_flags.READ_ONLY | cl.mem_flags.USE_HOST_PTR,
                                 hostbuf=self._parameters),
                       objectives_buffer]

        for data in [self._data_info[key] for key in sorted(self._data_info)]:
            all_buffers.append(cl.Buffer(self._cl_run_context.context,
                                         cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data.get_data()))

        return all_buffers, objectives_buffer
github theMarix / clBandwidth / runner.py View on Github external
devices = [platform.get_devices()[device]]
			self.ctx = cl.Context(devices, properties)
		else:
			self.ctx = cl.create_some_context()
		self.queue = cl.CommandQueue(self.ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)

		self.device = self.queue.device
		print '#Device: {0}'.format(self.device.name)
		print '#Memory size: {0} KiB'.format(self.device.global_mem_size / 1024)
		print '#Maximum buffer size: {0} KiB'.format(self.device.max_mem_alloc_size / 1024)

		f = open('kernels.cl', 'r')
		fstr = "".join(f.readlines())
		self.prg = cl.Program(self.ctx, fstr).build()

		self.in_buf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY, max_mem_size)
		self.out_buf = cl.Buffer(self.ctx, cl.mem_flags.WRITE_ONLY, max_mem_size)

		self.local_threads = local_threads
		self.global_threads = global_threads
		self.max_mem_size = max_mem_size
github robbert-harms / MDT / mdt / cl_routines / mapping / calculate_model_estimates.py View on Github external
def _create_buffers(self):
        evaluations_buffer = cl.Buffer(self._cl_context,
                                       cl.mem_flags.WRITE_ONLY | cl.mem_flags.USE_HOST_PTR,
                                       hostbuf=self._evaluations)

        all_buffers = [cl.Buffer(self._cl_context,
                                 cl.mem_flags.READ_ONLY | cl.mem_flags.USE_HOST_PTR,
                                 hostbuf=self._parameters),
                       evaluations_buffer]
        all_buffers.extend(self._data_struct_manager.get_kernel_inputs(self._cl_context, 1))
        return all_buffers, evaluations_buffer