How to use the pyopencl.Buffer function in pyopencl

To help you get started, we’ve selected a few pyopencl examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github cherab / core / cherab / tools / inversions / opencl / sart_opencl.py View on Github external
self.geometry_matrix_device = cl.Buffer(self.cl_context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=geometry_matrix)
        if copy_column_major:
            geometry_matric_col_maj = geometry_matrix.flatten(order='F')
            self.geometry_matric_col_maj_device = cl.Buffer(self.cl_context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=geometry_matric_col_maj)
        else:
            self.geometry_matric_col_maj_device = None
        if laplacian_matrix is not None:
            laplacian_matrix = laplacian_matrix.flatten(order='F').astype(np.float32)
            self.laplacian_matrix_device = cl.Buffer(self.cl_context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=laplacian_matrix)
        else:
            self.laplacian_matrix_device = None
        self.cell_ray_densities_device = cl.Buffer(self.cl_context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=cell_ray_densities)
        self.ray_lengths_device = cl.Buffer(self.cl_context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=ray_lengths)
        grad_penalty = np.zeros(self.n_sources, dtype=np.float32)
        self.grad_penalty_device = cl.Buffer(self.cl_context, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=grad_penalty)
        self.solution_device = cl.Buffer(self.cl_context, mf.READ_WRITE, cell_ray_densities.nbytes)
        self.detectors_device = cl.Buffer(self.cl_context, mf.READ_ONLY, ray_lengths.nbytes)
        self.y_hat_device = cl.Buffer(self.cl_context, mf.READ_WRITE, ray_lengths.nbytes)

        # calculating global and local work sizes
        nrem = self.n_sources % block_size
        gws_sources_x = self.n_sources + bool(nrem) * (block_size - nrem)
        mrem = self.m_detectors % block_size
        gws_detectors_x = self.m_detectors + bool(mrem) * (block_size - mrem)
        mrem_rm = self.m_detectors % block_size_row_maj
        gws_detectors_row_maj_x = self.m_detectors + bool(mrem_rm) * (block_size - mrem_rm)
        if use_atomic:
            gws_sources_row_maj_y = self.n_sources // steps_per_thread_row_maj + bool(self.n_sources % steps_per_thread_row_maj)
            gws_sources_y = self.n_sources // steps_per_thread + bool(self.n_sources % steps_per_thread)
            gws_detectors_y = self.m_detectors // steps_per_thread + bool(self.m_detectors % steps_per_thread)
        else:
            gws_sources_row_maj_y = gws_sources_y = gws_detectors_y = 1
github benma / pysph / src / sph / radix_sort / radix_sort.py View on Github external
if (max_elements % (cta_size * 4)) == 0:
            num_blocks = max_elements // (cta_size * 4)
        else:
            num_blocks = max_elements // (cta_size * 4) + 1

        self.d_temp_keys = cl.Buffer(self.ctx, mf.READ_WRITE, size=self.dtype_size * max_elements)
        self.d_temp_values = cl.Buffer(self.ctx, mf.READ_WRITE, size=self.dtype_size * max_elements)

        self.d_counters = cl.Buffer(self.ctx, mf.READ_WRITE, size=self.dtype_size * self.WARP_SIZE * num_blocks)
        self.d_counters_sum = cl.Buffer(self.ctx, mf.READ_WRITE, size=self.dtype_size * self.WARP_SIZE * num_blocks)
        self.d_block_offsets = cl.Buffer(self.ctx, mf.READ_WRITE, size=self.dtype_size * self.WARP_SIZE * num_blocks)

        numscan = max_elements//2//cta_size*16
        if numscan >= self.MIN_LARGE_ARRAY_SIZE:
            #MAX_WORKGROUP_INCLUSIVE_SCAN_SIZE 1024
            self.scan_buffer = cl.Buffer(self.ctx, mf.READ_WRITE, size = self.dtype_size * numscan // 1024)
github enjalot / adventures_in_opencl / python / part2 / part2.py View on Github external
self.vel = vel

        #Setup vertex buffer objects and share them with OpenCL as GLBuffers
        self.pos_vbo.bind()
        #For some there is no single buffer but an array of buffers
        #https://github.com/enjalot/adventures_in_opencl/commit/61bfd373478767249fe8a3aa77e7e36b22d453c4
        try:
            self.pos_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.pos_vbo.buffer))
            self.col_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.col_vbo.buffer))
        except AttributeError:
            self.pos_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.pos_vbo.buffers[0]))
            self.col_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.col_vbo.buffers[0]))
        self.col_vbo.bind()

        #pure OpenCL arrays
        self.vel_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=vel)
        self.pos_gen_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.pos)
        self.vel_gen_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.vel)
        self.queue.finish()

        # set up the list of GL objects to share with opencl
        self.gl_objects = [self.pos_cl, self.col_cl]
github reven86 / gpgpu-neuralnet / nn / training.py View on Github external
def prepare_training( self, context ):
        """
        Create additional buffers to store learning rate for each weight.
        
        @param layer
            Input layer.
        """
        super( RPROP, self ).prepare_training( context )

        self.n_buf = pyopencl.Buffer( 
            context.opencl.context, pyopencl.mem_flags.READ_WRITE | pyopencl.mem_flags.COPY_HOST_PTR,
            hostbuf = numpy.array( [ self.n ] * context._weights_buf_size, numpy.float32 )
            )
        self.prev_gradient_buf = pyopencl.Buffer( 
            context.opencl.context, pyopencl.mem_flags.READ_ONLY | pyopencl.mem_flags.COPY_HOST_PTR,
            hostbuf = numpy.zeros( [ context._weights_buf_size ], numpy.float32 )
            )
github appleminis / gravity / gravity / galaxy.py View on Github external
cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR,
                            hostbuf=self.data)
                            
        self.clbufvit = cl.Buffer(self.ctx,
                            cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR,
                            hostbuf=self.datavit)
                            
        self.clbufdensity = cl.Buffer(self.ctx,
                            cl.mem_flags.READ_WRITE,
                            size=D*D*4)
                            
        self.clbufdensityint = cl.Buffer(self.ctx,
                            cl.mem_flags.READ_WRITE,
                            size=D*D*4)
                            
        self.clbufdensityvit = cl.Buffer(self.ctx,
                            cl.mem_flags.READ_WRITE,
                            size=D*D*8)
                            
        self.clbufdensityc = cl.Buffer(self.ctx,
                            cl.mem_flags.READ_WRITE,
                            size=D*D*8)
                            
        self.clbuffft = cl.Buffer(self.ctx,
                            cl.mem_flags.READ_WRITE,
                            size=D*D*8)
                            
        self.clbufifft = cl.Buffer(self.ctx,
                            cl.mem_flags.READ_WRITE,
                            size=D*D*8)
                            
        self.clbufpotential = cl.Buffer(self.ctx,
github enjalot / adventures_in_opencl / python / part2 / part2.py View on Github external
#Setup vertex buffer objects and share them with OpenCL as GLBuffers
        self.pos_vbo.bind()
        #For some there is no single buffer but an array of buffers
        #https://github.com/enjalot/adventures_in_opencl/commit/61bfd373478767249fe8a3aa77e7e36b22d453c4
        try:
            self.pos_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.pos_vbo.buffer))
            self.col_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.col_vbo.buffer))
        except AttributeError:
            self.pos_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.pos_vbo.buffers[0]))
            self.col_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.col_vbo.buffers[0]))
        self.col_vbo.bind()

        #pure OpenCL arrays
        self.vel_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=vel)
        self.pos_gen_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.pos)
        self.vel_gen_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.vel)
        self.queue.finish()

        # set up the list of GL objects to share with opencl
        self.gl_objects = [self.pos_cl, self.col_cl]
github inducer / pyopencl / examples / demo_meta_template.py View on Github external
local_size = 256
thread_strides = 32
macroblock_count = 33
dtype = numpy.float32
total_size = local_size*thread_strides*macroblock_count

ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

a = numpy.random.randn(total_size).astype(dtype)
b = numpy.random.randn(total_size).astype(dtype)

mf = cl.mem_flags
a_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a)
b_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b)
c_buf = cl.Buffer(ctx, mf.WRITE_ONLY, b.nbytes)

from mako.template import Template

tpl = Template("""
    __kernel void add(
            __global ${ type_name } *tgt, 
            __global const ${ type_name } *op1, 
            __global const ${ type_name } *op2)
    {
      int idx = get_local_id(0)
        + ${ local_size } * ${ thread_strides }
        * get_group_id(0);

      % for i in range(thread_strides):
          <% offset = i*local_size %>
github tridesclous / tridesclous / tridesclous / peeler_engine_classic.py View on Github external
'wf_size':peak_width*nb_channel,'nb_cluster' : nb_cluster}
            prg = pyopencl.Program(self.ctx, kernel)
            opencl_prg = prg.build(options='-cl-mad-enable')
            self.kern_waveform_distance = getattr(opencl_prg, 'waveform_distance')
            
            # create CL buffers
            wf_shape = centers.shape[1:]
            one_waveform = np.zeros(wf_shape, dtype='float32')
            self.one_waveform_cl = pyopencl.Buffer(self.ctx, mf.READ_WRITE| mf.COPY_HOST_PTR, hostbuf=one_waveform)

            self.catalogue_center_cl = pyopencl.Buffer(self.ctx, mf.READ_WRITE| mf.COPY_HOST_PTR, hostbuf=centers)

            self.waveform_distance = np.zeros((nb_cluster), dtype='float32')
            self.waveform_distance_cl = pyopencl.Buffer(self.ctx, mf.READ_WRITE| mf.COPY_HOST_PTR, hostbuf=self.waveform_distance)

            self.sparse_mask_level1_cl = pyopencl.Buffer(self.ctx, mf.READ_WRITE| mf.COPY_HOST_PTR, hostbuf=self.sparse_mask_level1.astype('u1'))

            rms_waveform_channel = np.zeros(nb_channel, dtype='float32')
            self.rms_waveform_channel_cl = pyopencl.Buffer(self.ctx, mf.READ_WRITE| mf.COPY_HOST_PTR, hostbuf=rms_waveform_channel)
            
            self.cl_global_size = (centers.shape[0], centers.shape[2])
            self.cl_local_size = (centers.shape[0], 1) # faster a GPU because of memory access
        
        # force engine to global
        p = dict(self.catalogue['peak_detector_params'])
        p.pop('engine')
        p.pop('method')
        
        self.peakdetector_method = 'global'
        self.peakdetector_engine = 'numpy'
        PeakDetector_class = get_peak_detector_class(self.peakdetector_method, self.peakdetector_engine)
github enjalot / adventures_in_opencl / experiments / bitonic / bitonic.py View on Github external
def __init__(self, max_elements, cta_size, dtype):

        plat = cl.get_platforms()[0]
        device = plat.get_devices()[0]
        self.ctx = cl.Context(devices=[device])
        self.queue = cl.CommandQueue(self.ctx, device)



        self.loadProgram()
        self.uintsz = dtype.itemsize
        self.d_tempKeys = cl.Buffer(self.ctx, mf.READ_WRITE, size=self.uintsz * max_elements)
        self.d_tempValues = cl.Buffer(self.ctx, mf.READ_WRITE, size=self.uintsz * max_elements)
github ChrisCummins / clgen / clgen / cldrive.py View on Github external
arg.bufsize = nonbuf.nbytes
                    arg.devdata = cl.LocalMemory(arg.bufsize)
                elif arg.is_pointer:
                    # If arg is a pointer to global memory, then we
                    # allocate host memory and populate with values:
                    arg.hostdata = nparray(veclength).astype(dtype)

                    # Determine flags to pass to OpenCL buffer creation:
                    arg.flags = cl.mem_flags.COPY_HOST_PTR
                    if arg.is_const:
                        arg.flags |= cl.mem_flags.READ_ONLY
                    else:
                        arg.flags |= cl.mem_flags.READ_WRITE

                    # Allocate device memory:
                    arg.devdata = cl.Buffer(
                        driver.context, arg.flags, hostbuf=arg.hostdata)

                    # Record transfer overhead. If it's a const buffer,
                    # we're not reading back to host.
                    if arg.is_const:
                        transfer += arg.hostdata.nbytes
                    else:
                        transfer += 2 * arg.hostdata.nbytes
                else:
                    # If arg is not a pointer, then it's a scalar value:
                    arg.devdata = dtype(size)
        except Exception as e:
            raise E_BAD_ARGS(e)

        return KernelPayload(driver.context, args, (size,), transfer)