Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _test_constantone(dims, nsources, ntargets, dtype):
from mpi4py import MPI
# Get the current rank
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
# Initialization
trav = None
sources_weights = None
# Configure PyOpenCL
import pyopencl as cl
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
print(queue.context.devices)
if rank == 0:
# Generate random particles
from boxtree.tools import make_normal_particle_array as p_normal
sources = p_normal(queue, nsources, dims, dtype, seed=15)
targets = (p_normal(queue, ntargets, dims, dtype, seed=18) +
np.array([2, 0, 0])[:dims])
# Constant one source weights
sources_weights = np.ones((nsources,), dtype=dtype)
# Build the global tree
from boxtree import TreeBuilder
def run():
import pyopencl as cl
import pyopencl.array as cla
cx = cl.create_some_context()
q = cl.CommandQueue(cx, properties=cl.command_queue_properties.PROFILING_ENABLE)
img = np.zeros((H, W), np.float32)
import moonshine
data = moonshine.open('samples/sonata.png')[0].im
img[:data.shape[0], :data.shape[1]] = data != 0
dimg = cla.to_device(q, img[:H/sample, :W/sample].copy())
costs = cla.zeros(q, (numx, numy, numy), np.float32)
prg = cl.Program(cx, open("boundary.cl").read()).build()
prg.boundary_cost.set_scalar_arg_dtypes([None, np.int32, np.int32, np.int32, np.int32, np.int32, np.int32, np.int32, None])
return prg.boundary_cost(q, (numx, numy, numy), (1, 1, 1), dimg.data, np.int32(W/sample), np.int32(y0), np.int32(ystep), np.int32(numy), np.int32(x0), np.int32(xstep), np.int32(numx), costs.data), costs
ai = pyFAI.load("testimages/halfccd.poni")
data = fabio.open("testimages/halfccd.edf").data
workgroup_size = 256
bins = 1000
pos_in = ai.array_from_unit(data.shape, "corner", unit="2th_deg", scale=False)
pos = pos_in.reshape(pos_in.size / 8, 4, 2)
pos_size = pos.size
# size = data.size
size = pos_size / 8
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
mf = cl.mem_flags
d_pos = cl.array.to_device(queue, pos)
d_preresult = cl.array.empty(queue, (4 * workgroup_size,), dtype=numpy.float32)
d_minmax = cl.array.empty(queue, (4,), dtype=numpy.float32)
with open("../openCL/ocl_lut_pixelsplit.cl", "r") as kernelFile:
kernel_src = kernelFile.read()
compile_options = "-D BINS=%i -D NIMAGE=%i -D WORKGROUP_SIZE=%i -D EPS=%e" % \
(bins, size, workgroup_size, numpy.finfo(numpy.float32).eps)
print(compile_options)
program = cl.Program(ctx, kernel_src).build(options=compile_options)
# The simplest possible PyOpenCL program - Sums two arrays
import pyopencl as cl # Access to the OpenCL API
import numpy # Tools to create and manipulate numbers
context = cl.create_some_context() # Create a Context (one per computer)
queue = cl.CommandQueue(context) # Create a Command Queue (usually one per processor)
kernel = """__kernel void sum(__global float* a, __global float* b, __global float* c)
{
int i = get_global_id(0);
c[i] = a[i] + b[i];
}""" # The C-like code that will run on the GPU
program = cl.Program(context, kernel).build() # Compile the kernel into a Program
flags = cl.mem_flags # Create a shortcut to OpenCL's memory instructions
a = numpy.random.rand(5).astype(numpy.float32)
b = numpy.random.rand(5).astype(numpy.float32) # Create two large float arrays
a_buffer = cl.Buffer(context, flags.COPY_HOST_PTR, hostbuf=a)
b_buffer = cl.Buffer(context, flags.COPY_HOST_PTR, hostbuf=b)
Identity matrix (n x n) as GPUArray.
"""
return give_cl(queue, eye(n, dtype=float32))
# ==============================================================================
# Main
# ==============================================================================
if __name__ == "__main__":
from compas.hpc import get_cl
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
a_ = give_cl(queue, [[0, 1, 2]])
print(get_cl(diag_cl(queue, [0, 1, 2])))
print(get_cl(eye_cl(queue, 3)))
print(get_cl(transpose_cl(a_)))
print "\n===== Sequential, matrix mult (dot prod), order", ORDER, "on host CPU ======\n"
for i in range(COUNT):
h_C.fill(0.0)
start_time = time()
print "Skipping as this takes a long time to run!"
#seq_mat_mul_sdot(N, h_A, h_B, h_C)
run_time = time() - start_time
#results(N, h_C, run_time)
# Set up OpenCL
context = cl.create_some_context()
queue = cl.CommandQueue(context)
# Reset host buffers - just to play it safe
h_A = numpy.empty(size).astype(numpy.float32)
h_A.fill(AVAL)
h_B = numpy.empty(size).astype(numpy.float32)
h_B.fill(BVAL)
h_C = numpy.empty(size).astype(numpy.float32)
# Create OpenCL buffers
d_a = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=h_A)
d_b = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=h_B)
d_c = cl.Buffer(context, cl.mem_flags.WRITE_ONLY, h_C.nbytes)
program = cl.Program(context, C_elem_KernelSource).build()
mmul = program.mmul
def __init__(self):
self.ctx = cl.create_some_context()
self.queue = cl.CommandQueue(self.ctx)
def run(double_precision=False):
context = cl.create_some_context()
queue = cl.CommandQueue(context)
dtype = np.complex64 if not double_precision else np.complex128
n_run = 100 #set to 1 for testing for correct result
if n_run > 1:
nd_dataC = np.random.normal(size=(1024, 1024)).astype(dtype)
else:
nd_dataC = np.ones((1024, 1024), dtype = dtype) #set n_run to 1
nd_dataF = np.asfortranarray(nd_dataC)
dataC = cla.to_device(queue, nd_dataC)
dataF = cla.to_device(queue, nd_dataF)
nd_result = np.zeros_like(nd_dataC, dtype = dtype)
import pyopencl as cl
import numpy
from mako.template import Template
local_size = 256
thread_strides = 32
macroblock_count = 33
dtype = numpy.float32
total_size = local_size*thread_strides*macroblock_count
context = cl.create_some_context()
queue = cl.CommandQueue(context)
a = numpy.random.randn(total_size).astype(dtype)
b = numpy.random.randn(total_size).astype(dtype)
c = numpy.empty_like(a)
a_buf = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=a)
b_buf = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=b)
c_buf = cl.Buffer(context, cl.mem_flags.WRITE_ONLY, b.nbytes)
template = Template("""
__kernel void add(
__global ${ type_name } *tgt,
__global const ${ type_name } *op1,
__global const ${ type_name } *op2)
{
import pyopencl as cl
from time import time
import numpy
block_size = 32 # XXX Why sixteen? XXX
context = cl.create_some_context()
queue = cl.CommandQueue(context, properties=cl.command_queue_properties.PROFILING_ENABLE)
a_width = 2048
a_height = 2048
b_width = 2048
b_height = 2048
c_width = b_width
c_height = a_height
h_a = numpy.random.rand(a_height, a_width).astype(numpy.float32)
h_b = numpy.random.rand(b_height, b_width).astype(numpy.float32)
h_c = numpy.empty((c_height, c_width)).astype(numpy.float32)
kernel_params = {"block_size": block_size, "w_a":a_width, "h_a":a_height, "w_b":b_width}