Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def make_gpu_integrator(p, eps, A):
ps = [p] * 4
q_unmapped = tensor_gauss(ps)
code = open('toy_kernel.cl', 'r').read()
import pyopencl as cl
import pyopencl.array
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
prg = cl.Program(ctx, code).build()
# mf = cl.mem_flags
# a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
# b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
gpu_qx = cl.array.to_device(queue, q_unmapped[0].flatten().astype(np.float32))
gpu_qw = cl.array.to_device(queue, q_unmapped[1].astype(np.float32))
def integrator(mins, maxs):
print(mins.shape[0])
block = (32, 1, 1)
remaining = mins.shape[0] % block[0]
grid_main = (mins.shape[0] // block[0], 1, 1)
grid_rem = (remaining, 1, 1)
out = np.empty(mins.shape[0]).astype(np.float32)
else:
self.signals_medians_cl = pyopencl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np.zeros(self.nb_channel, dtype= self.output_dtype))
self.signals_mads_cl = pyopencl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np.zeros(self.nb_channel, dtype= self.output_dtype))
#CL prog
if not self.common_ref_removal and self.normalize:
extra_code_nomalize = _extra_code_nomalize
else:
extra_code_nomalize = ''
kernel_formated = processor_kernel%dict(forward_chunksize=self.chunksize, backward_chunksize=self.backward_chunksize,
lostfront_chunksize=self.lostfront_chunksize, nb_section=self.nb_section, nb_channel=self.nb_channel,
extra_code_nomalize=extra_code_nomalize)
#~ print(kernel_formated)
#~ exit()
prg = pyopencl.Program(self.ctx, kernel_formated)
self.opencl_prg = prg.build(options='-cl-mad-enable')
self.max_wg_size = self.ctx.devices[0].get_info(pyopencl.device_info.MAX_WORK_GROUP_SIZE)
self.kern_forward_backward_filter = getattr(self.opencl_prg, 'forward_backward_filter')
self.context = cl.Context([self.device], None, None)
if (self.device.extensions.find('cl_amd_media_ops') != -1):
self.defines += ' -DBITALIGN'
self.defines += ' -DBFI_INT'
kernel_file = open('phatk.cl', 'r')
kernel = kernel_file.read()
kernel_file.close()
m = md5(); m.update(''.join([self.device.platform.name, self.device.platform.version, self.device.name, self.defines, kernel]))
cache_name = '%s.elf' % m.hexdigest()
binary = None
try:
binary = open(cache_name, 'rb')
self.miner = cl.Program(self.context, [self.device], [binary.read()]).build(self.defines)
except (IOError, cl.LogicError):
self.miner = cl.Program(self.context, kernel).build(self.defines)
if (self.defines.find('-DBITALIGN') != -1):
patchedBinary = patch(self.miner.binaries[0])
self.miner = cl.Program(self.context, [self.device], [patchedBinary]).build(self.defines)
binaryW = open(cache_name, 'wb')
binaryW.write(self.miner.binaries[0])
binaryW.close()
finally:
if binary: binary.close()
if (self.options.worksize == -1):
self.options.worksize = self.miner.search.get_work_group_info(cl.kernel_work_group_info.WORK_GROUP_SIZE, self.device)
if i[0] != '#':
cleanedPreprocessedCode += i + '\n'
print cleanedPreprocessedCode
#Create the OpenCL context and command queue
self._ctx = cl.create_some_context()
queueProperties = cl.command_queue_properties.PROFILING_ENABLE
self._queue = cl.CommandQueue(self._ctx,
properties=queueProperties)
programBuildOptions = "-cl-fast-relaxed-math"
if not self._enableDouble:
programBuildOptions += " -cl-single-precision-constant"
#Build the program and identify metropolis as the kernel
self._prg = (cl.Program(self._ctx, kernelCode)
.build(options=programBuildOptions))
self._kernel = self._prg.metropolis
#Initial paths are created (the initial path vector is filled with zeros,
#meaning no movement of the particles)
try:
self._paths = cl.array.zeros(self._queue,
(self._nbrOfWalkers, self._N * self._system.DOF),
np.float64 if self._enableDouble else np.float32)
#Buffer for storing number of accepted values and
#seeds for the xorshfitPRNG
self._accepts = cl.array.zeros(self._queue,
(self._nbrOfThreads, ), np.uint32)
#np.random.seed(0)
# example by Roger Pau Monn'e
import pyopencl as cl
import numpy as np
demo_r = np.empty( (500,5), dtype=np.uint32)
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
mf = cl.mem_flags
demo_buf = cl.Buffer(ctx, mf.WRITE_ONLY, demo_r.nbytes)
prg = cl.Program(ctx,
"""
__kernel void demo(__global uint *demo)
{
int i;
int gid = get_global_id(0);
for(i=0; i<5;i++)
{
demo[gid*5+i] = (uint) 1;
}
}""")
try:
prg.build()
except:
print("Error:")
print(prg.get_build_info(ctx.devices[0], cl.program_build_info.LOG))
def get_rendered_kernel(self, txt, kernel_name):
import pyopencl as cl
prg = cl.Program(self.context, self(txt)).build(self.options)
kernel_name_prefix = self.var_dict.get("kernel_name_prefix")
if kernel_name_prefix is not None:
kernel_name = kernel_name_prefix+kernel_name
return getattr(prg, kernel_name)
# Main procedure
# Create a compute context
# Ask the user to select a platform/device on the CLI
context = cl.create_some_context()
# Print out device info
deviceinfo.output_device_info(context.devices[0])
# Create a command queue
queue = cl.CommandQueue(context)
# Create the compute program from the source buffer
# and build it
program = cl.Program(context, kernelsource).build()
# Create a and b vectors and fill with random float values
h_a = numpy.random.rand(LENGTH).astype(numpy.float32)
h_b = numpy.random.rand(LENGTH).astype(numpy.float32)
# Create an empty c vector (a+b) to be returned from the compute device
h_c = numpy.empty(LENGTH).astype(numpy.float32)
# Create the input (a, b) arrays in device memory and copy data from host
d_a = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=h_a)
d_b = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=h_b)
# Create the output (c) array in device memory
d_c = cl.Buffer(context, cl.mem_flags.WRITE_ONLY, h_c.nbytes)
# Start the timer
rtime = time()
# Use OpenCL To Add Two Random Arrays (This Way Hides Details)
import pyopencl as cl # Import the OpenCL GPU computing API
import pyopencl.array as pycl_array # Import PyOpenCL Array (a Numpy array plus an OpenCL buffer object)
import numpy as np # Import Numpy number tools
context = cl.create_some_context() # Initialize the Context
queue = cl.CommandQueue(context) # Instantiate a Queue
a = pycl_array.to_device(queue, np.random.rand(50000).astype(np.float32))
b = pycl_array.to_device(queue, np.random.rand(50000).astype(np.float32))
# Create two random pyopencl arrays
c = pycl_array.empty_like(a) # Create an empty pyopencl destination array
program = cl.Program(context, """
__kernel void sum(__global const float *a, __global const float *b, __global float *c)
{
int i = get_global_id(0);
c[i] = a[i] + b[i];
}""").build() # Create the OpenCL program
program.sum(queue, a.shape, None, a.data, b.data, c.data) # Enqueue the program for execution and store the result in c
print("a: {}".format(a))
print("b: {}".format(b))
print("c: {}".format(c))
# Print all three arrays, to show sum() worked
import numpy.linalg as la
n = 50000
a = numpy.random.rand(n).astype(numpy.float32)
b = numpy.random.rand(n).astype(numpy.float32)
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
mf = cl.mem_flags
a_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a)
b_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b)
dest_buf = cl.Buffer(ctx, mf.WRITE_ONLY, b.nbytes)
prg = cl.Program(ctx, """
__kernel void sum(__global const float *a,
__global const float *b, __global float *c)
{
int gid = get_global_id(0);
c[gid] = a[gid] + b[gid];
}
""").build()
start = 2000
prg.sum(queue, (n-start,), None, a_buf, b_buf, dest_buf,
global_offset=(start,))
a_plus_b = numpy.empty_like(a)
cl.enqueue_copy(queue, a_plus_b, dest_buf)
print(la.norm((a_plus_b - (a+b))[start:]), la.norm(a_plus_b))
self.clbufgrad = cl.Buffer(self.ctx,
cl.mem_flags.READ_WRITE,
size=D*D*8)
# create an interop object to access to GL VBO from OpenCL
self.glclbuf = cl.GLBuffer(self.ctx, cl.mem_flags.READ_WRITE,
int(self.glbuf.buffers[0]))
# build the OpenCL program
self.program = cl.Program(self.ctx, clkernel).build()
self.addvit = cl.Program(self.ctx, clkeraddvit).build()
self.setzero = cl.Program(self.ctx, clkersetzero).build()
self.density = cl.Program(self.ctx, clkerdensity).build()
self.tocomplex = cl.Program(self.ctx, clkertocomplex).build()
self.gravity = cl.Program(self.ctx, clkergravity).build()
self.potential = cl.Program(self.ctx, clkerpotential).build()
self.grad = cl.Program(self.ctx, clkergrad).build()
self.acceleration = cl.Program(self.ctx, clkeracceleration).build()
self.visu = cl.Program(self.ctx, clkervisu).build()
self.plan = Plan((D,D), queue=self.queue)
self.enctime=0
self.rdbufint = np.zeros((D*D),np.int32)
self.daint = np.zeros((D*D),np.int32)
self.rdgl = np.zeros((D,D,3),np.uint8)
# release the PyOpenCL queue
self.queue.finish()