How to use the pyopencl.Program function in pyopencl

To help you get started, we’ve selected a few pyopencl examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tbenthompson / tectosaur / tables / test_ndadapt.py View on Github external
def make_gpu_integrator(p, eps, A):
    ps = [p] * 4
    q_unmapped = tensor_gauss(ps)

    code = open('toy_kernel.cl', 'r').read()
    import pyopencl as cl
    import pyopencl.array
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)
    prg = cl.Program(ctx, code).build()

    # mf = cl.mem_flags
    # a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
    # b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)

    gpu_qx = cl.array.to_device(queue, q_unmapped[0].flatten().astype(np.float32))
    gpu_qw = cl.array.to_device(queue, q_unmapped[1].astype(np.float32))

    def integrator(mins, maxs):
        print(mins.shape[0])
        block = (32, 1, 1)
        remaining = mins.shape[0] % block[0]
        grid_main = (mins.shape[0] // block[0], 1, 1)
        grid_rem = (remaining, 1, 1)

        out = np.empty(mins.shape[0]).astype(np.float32)
github tridesclous / tridesclous / tridesclous / signalpreprocessor.py View on Github external
else:
            self.signals_medians_cl = pyopencl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np.zeros(self.nb_channel, dtype= self.output_dtype))
            self.signals_mads_cl = pyopencl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np.zeros(self.nb_channel, dtype= self.output_dtype))
        
        #CL prog
        if not self.common_ref_removal and  self.normalize:
            extra_code_nomalize = _extra_code_nomalize
        else:
            extra_code_nomalize = ''
        
        kernel_formated = processor_kernel%dict(forward_chunksize=self.chunksize, backward_chunksize=self.backward_chunksize,
                        lostfront_chunksize=self.lostfront_chunksize, nb_section=self.nb_section, nb_channel=self.nb_channel, 
                        extra_code_nomalize=extra_code_nomalize)
        #~ print(kernel_formated)
        #~ exit()
        prg = pyopencl.Program(self.ctx, kernel_formated)
        self.opencl_prg = prg.build(options='-cl-mad-enable')
        
        self.max_wg_size = self.ctx.devices[0].get_info(pyopencl.device_info.MAX_WORK_GROUP_SIZE)


        self.kern_forward_backward_filter = getattr(self.opencl_prg, 'forward_backward_filter')
github c00w / bitHopper / bitHopper / plugins / poclbm / BitcoinMiner.py View on Github external
self.context = cl.Context([self.device], None, None)
		if (self.device.extensions.find('cl_amd_media_ops') != -1):
			self.defines += ' -DBITALIGN'
			self.defines += ' -DBFI_INT'

		kernel_file = open('phatk.cl', 'r')
		kernel = kernel_file.read()
		kernel_file.close()
		m = md5(); m.update(''.join([self.device.platform.name, self.device.platform.version, self.device.name, self.defines, kernel]))
		cache_name = '%s.elf' % m.hexdigest()
		binary = None
		try:
			binary = open(cache_name, 'rb')
			self.miner = cl.Program(self.context, [self.device], [binary.read()]).build(self.defines)
		except (IOError, cl.LogicError):
			self.miner = cl.Program(self.context, kernel).build(self.defines)
			if (self.defines.find('-DBITALIGN') != -1):
				patchedBinary = patch(self.miner.binaries[0])
				self.miner = cl.Program(self.context, [self.device], [patchedBinary]).build(self.defines)
			binaryW = open(cache_name, 'wb')
			binaryW.write(self.miner.binaries[0])
			binaryW.close()
		finally:
			if binary: binary.close()

		if (self.options.worksize == -1):
			self.options.worksize = self.miner.search.get_work_group_info(cl.kernel_work_group_info.WORK_GROUP_SIZE, self.device)
github KristofferC / FeynSimul / FeynSimul / kernel.py View on Github external
if i[0] != '#':
                        cleanedPreprocessedCode += i + '\n'
            print cleanedPreprocessedCode

        #Create the OpenCL context and command queue
        self._ctx = cl.create_some_context()
        queueProperties = cl.command_queue_properties.PROFILING_ENABLE
        self._queue = cl.CommandQueue(self._ctx,
                                           properties=queueProperties)

        programBuildOptions = "-cl-fast-relaxed-math"
        if not self._enableDouble:
            programBuildOptions += " -cl-single-precision-constant"

        #Build the program and identify metropolis as the kernel
        self._prg = (cl.Program(self._ctx, kernelCode)
                         .build(options=programBuildOptions))
        self._kernel = self._prg.metropolis

        #Initial paths are created (the initial path vector is filled with zeros,
        #meaning no movement of the particles)
        try:
            self._paths = cl.array.zeros(self._queue,
                              (self._nbrOfWalkers, self._N * self._system.DOF),
                              np.float64 if self._enableDouble else np.float32)

            #Buffer for storing number of accepted values and
            #seeds for the xorshfitPRNG
            self._accepts = cl.array.zeros(self._queue,
                    (self._nbrOfThreads, ), np.uint32)

            #np.random.seed(0)
github benshope / PyOpenCL-Tutorial / examples / narray.py View on Github external
# example by Roger Pau Monn'e
import pyopencl as cl
import numpy as np

demo_r = np.empty( (500,5), dtype=np.uint32)
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

mf = cl.mem_flags
demo_buf = cl.Buffer(ctx, mf.WRITE_ONLY, demo_r.nbytes)

prg = cl.Program(ctx,
"""
__kernel void demo(__global uint *demo)
{
    int i;
    int gid = get_global_id(0);
    for(i=0; i<5;i++)
    {
        demo[gid*5+i] = (uint) 1;
    }
}""")

try:
    prg.build()
except:
    print("Error:")
    print(prg.get_build_info(ctx.devices[0], cl.program_build_info.LOG))
github inducer / pyopencl / pyopencl / tools.py View on Github external
def get_rendered_kernel(self, txt, kernel_name):
        import pyopencl as cl
        prg = cl.Program(self.context, self(txt)).build(self.options)

        kernel_name_prefix = self.var_dict.get("kernel_name_prefix")
        if kernel_name_prefix is not None:
            kernel_name = kernel_name_prefix+kernel_name

        return getattr(prg, kernel_name)
github HandsOnOpenCL / Exercises-Solutions / Exercises / Exercise05 / Python / vadd.py View on Github external
# Main procedure

# Create a compute context
# Ask the user to select a platform/device on the CLI
context = cl.create_some_context()

# Print out device info
deviceinfo.output_device_info(context.devices[0])

# Create a command queue
queue = cl.CommandQueue(context)

# Create the compute program from the source buffer
# and build it
program = cl.Program(context, kernelsource).build()

# Create a and b vectors and fill with random float values
h_a = numpy.random.rand(LENGTH).astype(numpy.float32)
h_b = numpy.random.rand(LENGTH).astype(numpy.float32)
# Create an empty c vector (a+b) to be returned from the compute device
h_c = numpy.empty(LENGTH).astype(numpy.float32)

# Create the input (a, b) arrays in device memory and copy data from host
d_a = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=h_a)
d_b = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=h_b)
# Create the output (c) array in device memory
d_c = cl.Buffer(context, cl.mem_flags.WRITE_ONLY, h_c.nbytes)

# Start the timer
rtime = time()
github benshope / PyOpenCL-Tutorial / 020_array_sum.py View on Github external
# Use OpenCL To Add Two Random Arrays (This Way Hides Details)

import pyopencl as cl  # Import the OpenCL GPU computing API
import pyopencl.array as pycl_array  # Import PyOpenCL Array (a Numpy array plus an OpenCL buffer object)
import numpy as np  # Import Numpy number tools

context = cl.create_some_context()  # Initialize the Context
queue = cl.CommandQueue(context)  # Instantiate a Queue

a = pycl_array.to_device(queue, np.random.rand(50000).astype(np.float32))
b = pycl_array.to_device(queue, np.random.rand(50000).astype(np.float32))  
# Create two random pyopencl arrays
c = pycl_array.empty_like(a)  # Create an empty pyopencl destination array

program = cl.Program(context, """
__kernel void sum(__global const float *a, __global const float *b, __global float *c)
{
  int i = get_global_id(0);
  c[i] = a[i] + b[i];
}""").build()  # Create the OpenCL program

program.sum(queue, a.shape, None, a.data, b.data, c.data)  # Enqueue the program for execution and store the result in c

print("a: {}".format(a))
print("b: {}".format(b))
print("c: {}".format(c))  
# Print all three arrays, to show sum() worked
github benshope / PyOpenCL-Tutorial / examples / demo-offset.py View on Github external
import numpy.linalg as la

n = 50000

a = numpy.random.rand(n).astype(numpy.float32)
b = numpy.random.rand(n).astype(numpy.float32)

ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

mf = cl.mem_flags
a_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a)
b_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b)
dest_buf = cl.Buffer(ctx, mf.WRITE_ONLY, b.nbytes)

prg = cl.Program(ctx, """
    __kernel void sum(__global const float *a,
    __global const float *b, __global float *c)
    {
      int gid = get_global_id(0);
      c[gid] = a[gid] + b[gid];
    }
    """).build()

start = 2000
prg.sum(queue, (n-start,), None, a_buf, b_buf, dest_buf,
        global_offset=(start,))

a_plus_b = numpy.empty_like(a)
cl.enqueue_copy(queue, a_plus_b, dest_buf)

print(la.norm((a_plus_b - (a+b))[start:]), la.norm(a_plus_b))
github appleminis / gravity / gravity / galaxy.py View on Github external
self.clbufgrad = cl.Buffer(self.ctx,
                            cl.mem_flags.READ_WRITE,
                            size=D*D*8)
                            
        # create an interop object to access to GL VBO from OpenCL
        self.glclbuf = cl.GLBuffer(self.ctx, cl.mem_flags.READ_WRITE,
                            int(self.glbuf.buffers[0]))
        # build the OpenCL program
        self.program = cl.Program(self.ctx, clkernel).build()
        self.addvit = cl.Program(self.ctx, clkeraddvit).build()
        self.setzero = cl.Program(self.ctx, clkersetzero).build()
        self.density = cl.Program(self.ctx, clkerdensity).build()
        self.tocomplex = cl.Program(self.ctx, clkertocomplex).build()
        self.gravity = cl.Program(self.ctx, clkergravity).build()
        self.potential = cl.Program(self.ctx, clkerpotential).build()
        self.grad = cl.Program(self.ctx, clkergrad).build()
        self.acceleration = cl.Program(self.ctx, clkeracceleration).build()
        self.visu = cl.Program(self.ctx, clkervisu).build()

        self.plan = Plan((D,D), queue=self.queue)
        
        self.enctime=0
        self.rdbufint = np.zeros((D*D),np.int32)
        self.daint = np.zeros((D*D),np.int32)
        self.rdgl = np.zeros((D,D,3),np.uint8)
        # release the PyOpenCL queue
        self.queue.finish()