How to use the pyopencl.array.empty_like function in pyopencl

To help you get started, we’ve selected a few pyopencl examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github inducer / pyopencl / test / test_algorithm.py View on Github external
def test_elwise_kernel(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand

    a_gpu = clrand(queue, (50,), np.float32)
    b_gpu = clrand(queue, (50,), np.float32)

    from pyopencl.elementwise import ElementwiseKernel
    lin_comb = ElementwiseKernel(context,
            "float a, float *x, float b, float *y, float *z",
            "z[i] = a*x[i] + b*y[i]",
            "linear_combination")

    c_gpu = cl_array.empty_like(a_gpu)
    lin_comb(5, a_gpu, 6, b_gpu, c_gpu)

    assert la.norm((c_gpu - (5 * a_gpu + 6 * b_gpu)).get()) < 1e-5
github inducer / loopy / test / test_matmul.py View on Github external
knl = lp.split_dimension(knl, "k", 16, no_slabs=True)
    #knl = lp.split_dimension(knl, "k_inner", 8, outer_tag="unr")
    knl = lp.add_prefetch(knl, 'a', ["k_inner", ("i_inner_inner", "i_inner_outer")])
    knl = lp.add_prefetch(knl, 'b', ["k_inner", ("j_inner_inner", "j_inner_outer"),])
    assert knl.get_problems({})[0] <= 2

    kernel_gen = (lp.insert_register_prefetches(knl)
            for knl in lp.generate_loop_schedules(knl,
                hints=["k_outer", "k_inner_outer", "k_inner_inner"]
                ))

    a = make_well_conditioned_dev_matrix(queue, n, dtype=dtype, order=order)
    b = make_well_conditioned_dev_matrix(queue, n, dtype=dtype, order=order)
    a_img = cl.image_from_array(ctx, a.get(), 1)
    b_img = cl.image_from_array(ctx, b.get(), 1)
    c = cl_array.empty_like(a)
    refsol = np.dot(a.get(), b.get())

    def launcher(kernel, gsize, lsize, check):
        evt = kernel(queue, gsize(), lsize(), a_img, b_img, c.data,
                g_times_l=True)

        if check:
            check_error(refsol, c.get())

        return evt

    lp.drive_timing_run(kernel_gen, queue, launcher, 2*n**3)
github inducer / pyopencl / test / test_algorithm.py View on Github external
if i+1 < len(seg_boundaries):
                        seg_end = seg_boundaries[i+1]
                    else:
                        seg_end = None

                    if is_exclusive:
                        result_host[seg_start+1:seg_end] = np.cumsum(
                                a[seg_start:seg_end][:-1])
                        result_host[seg_start] = 0
                    else:
                        result_host[seg_start:seg_end] = np.cumsum(
                                a[seg_start:seg_end])

                #print "REF", result_host

                result_dev = cl_array.empty_like(a_dev)
                knl(a_dev, seg_boundary_flags_dev, result_dev)

                #print "RES", result_dev
                is_correct = (result_dev.get() == result_host).all()
                if not is_correct:
                    diff = result_dev.get() - result_host
                    print("RES-REF", diff)
                    print("ERRWHERE", np.where(diff))
                    print(n, list(seg_boundaries))

                assert is_correct
                from gc import collect
                collect()

            print("%d excl:%s done" % (n, is_exclusive))
github silx-kit / silx / silx / opencl / measurement.py View on Github external
shape = 4096
    # get the context
    ctx = pyopencl.Context()
    assert(not ctx is None)
    queue = pyopencl.CommandQueue(ctx)

    max_valid_wg = 1
    data = numpy.random.random(shape).astype(numpy.float32)
    d_data = pyopencl.array.to_device(queue, data)
    d_data_1 = pyopencl.array.zeros_like(d_data) + 1

    program = pyopencl.Program(ctx, get_opencl_code("addition")).build()

    maxi = int(round(numpy.log2(shape)))
    for i in range(maxi):
        d_res = pyopencl.array.empty_like(d_data)
        wg = 1 << i
        try:
            evt = program.addition(queue, (shape,), (wg,),
                   d_data.data, d_data_1.data, d_res.data, numpy.int32(shape))
            evt.wait()
        except Exception as error:
            print("Error on WG=%s: %s"%(wg, error))
            program = queue = d_res = d_data_1 = d_data = None
            break;
        else:
            res = d_res.get()
            good = numpy.allclose(res, data + 1 )
            if good and wg>max_valid_wg:
                max_valid_wg = wg
github silx-kit / silx / silx / opencl / common.py View on Github external
assert ctx is not None
    queue = pyopencl.CommandQueue(ctx)

    max_valid_wg = 1
    data = numpy.random.random(shape).astype(numpy.float32)
    d_data = pyopencl.array.to_device(queue, data)
    d_data_1 = pyopencl.array.empty_like(d_data)
    d_data_1.fill(numpy.float32(1.0))

    program = pyopencl.Program(ctx, get_opencl_code("addition")).build()
    if fast:
        max_valid_wg = program.addition.get_work_group_info(pyopencl.kernel_work_group_info.WORK_GROUP_SIZE, device)
    else:
        maxi = int(round(numpy.log2(shape)))
        for i in range(maxi + 1):
            d_res = pyopencl.array.empty_like(d_data)
            wg = 1 << i
            try:
                evt = program.addition(
                    queue, (shape,), (wg,),
                    d_data.data, d_data_1.data, d_res.data, numpy.int32(shape))
                evt.wait()
            except Exception as error:
                logger.info("%s on device %s for WG=%s/%s", error, device.name, wg, shape)
                program = queue = d_res = d_data_1 = d_data = None
                break
            else:
                res = d_res.get()
                good = numpy.allclose(res, data + 1)
                if good:
                    if wg > max_valid_wg:
                        max_valid_wg = wg
github silx-kit / pyFAI / pyFAI / opencl / common.py View on Github external
assert ctx is not None
    queue = pyopencl.CommandQueue(ctx)

    max_valid_wg = 1
    data = numpy.random.random(shape).astype(numpy.float32)
    d_data = pyopencl.array.to_device(queue, data)
    d_data_1 = pyopencl.array.zeros_like(d_data) + 1

    program = pyopencl.Program(ctx, get_opencl_code("addition")).build()
    if fast:
        max_valid_wg = program.addition.get_work_group_info(pyopencl.kernel_work_group_info.WORK_GROUP_SIZE, device)
    else:
        maxi = int(round(numpy.log2(shape)))
        for i in range(maxi + 1):
            d_res = pyopencl.array.empty_like(d_data)
            wg = 1 << i
            try:
                evt = program.addition(queue, (shape,), (wg,),
                                       d_data.data, d_data_1.data, d_res.data,
                                       numpy.int32(shape))

                evt.wait()
            except Exception as error:
                logger.info("%s on device %s for WG=%s/%s", error, device.name, wg, shape)
                program = queue = d_res = d_data_1 = d_data = None
                break
            else:
                res = d_res.get()
                good = numpy.allclose(res, data + 1)
                if good:
                    if wg > max_valid_wg:
github benshope / PyOpenCL-Tutorial / 020_array_sum.py View on Github external
# Use OpenCL To Add Two Random Arrays (This Way Hides Details)

import pyopencl as cl  # Import the OpenCL GPU computing API
import pyopencl.array as pycl_array  # Import PyOpenCL Array (a Numpy array plus an OpenCL buffer object)
import numpy as np  # Import Numpy number tools

context = cl.create_some_context()  # Initialize the Context
queue = cl.CommandQueue(context)  # Instantiate a Queue

a = pycl_array.to_device(queue, np.random.rand(50000).astype(np.float32))
b = pycl_array.to_device(queue, np.random.rand(50000).astype(np.float32))  
# Create two random pyopencl arrays
c = pycl_array.empty_like(a)  # Create an empty pyopencl destination array

program = cl.Program(context, """
__kernel void sum(__global const float *a, __global const float *b, __global float *c)
{
  int i = get_global_id(0);
  c[i] = a[i] + b[i];
}""").build()  # Create the OpenCL program

program.sum(queue, a.shape, None, a.data, b.data, c.data)  # Enqueue the program for execution and store the result in c

print("a: {}".format(a))
print("b: {}".format(b))
print("c: {}".format(c))  
# Print all three arrays, to show sum() worked
github inducer / loopy / examples / matrix-mul.py View on Github external
knl = lp.split_iname(knl, "j", ilp*j_inner_split, outer_tag="g.1")
    knl = lp.split_iname(knl, "j_inner", j_inner_split, outer_tag="ilp", inner_tag="l.0")
    knl = lp.split_iname(knl, "k", 2)

    knl = lp.add_prefetch(knl, 'a', ["i_inner", "k_inner"])
    knl = lp.add_prefetch(knl, 'b', ["j_inner_outer", "j_inner_inner", "k_inner"])
    assert knl.get_problems({})[0] <= 2

    kernel_gen = (lp.insert_register_prefetches(knl)
            for knl in lp.generate_loop_schedules(knl))

    a = make_well_conditioned_dev_matrix(queue, n, dtype=dtype, order=order,
            ran_factor=1, id_factor=5)
    b = make_well_conditioned_dev_matrix(queue, n, dtype=dtype, order=order,
            ran_factor=1, id_factor=5, inc_factor=0)
    c = cl_array.empty_like(a)
    a_img = cl.image_from_array(ctx, a.get(), 1)
    b_img = cl.image_from_array(ctx, b.get(), 1)

    def launcher(kernel, gsize, lsize, check):
        evt = kernel(queue, gsize(), lsize(), a_img, b_img, c.data,
                g_times_l=True)

        return evt

    from pyopencl.characterize import get_fast_inaccurate_build_options
    lp.drive_timing_run(kernel_gen, queue, launcher, 2*n**3,
            options=get_fast_inaccurate_build_options(ctx.devices[0]))
github silx-kit / fabio / sandbox / cbf.py View on Github external
data = fabio.open(fname).data
raw = cbf.read(fname, only_raw=True)
properties = pyopencl.command_queue_properties.PROFILING_ENABLE
# properties = None
queue = pyopencl.CommandQueue(ctx, properties=properties)


raw_n = numpy.fromstring(raw, dtype="int8")
size = raw_n.size
raw_d = pyopencl.array.to_device(queue, raw_n)
int_d = pyopencl.array.empty(queue, (size,), dtype="int32")
data_d = pyopencl.array.empty(queue, (data.size,), dtype="int32")
tmp1_d = pyopencl.array.zeros_like(data_d)
tmp2_d = pyopencl.array.zeros_like(data_d)
tmp3_d = pyopencl.array.zeros_like(data_d)
lem_d = pyopencl.array.empty_like(data_d)
zero_d = pyopencl.array.zeros(queue, shape=1, dtype="int32")

src = open("sandbox/cbf.cl").read()
prg = pyopencl.Program(ctx, src).build()

for i in range(11):
    WG = 1 << i
    print("#" * 80)
    print("WG: %s" % WG)
    la = pyopencl.LocalMemory(4 * WG)
    lb = pyopencl.LocalMemory(4 * WG)
    lc = pyopencl.LocalMemory(4 * WG)
#     ld = pyopencl.LocalMemory(4)
    debug1_d = pyopencl.array.zeros(queue, shape=WG, dtype="int32")
    debug2_d = pyopencl.array.zeros(queue, shape=WG, dtype="int32")
    debug3_d = pyopencl.array.zeros(queue, shape=WG, dtype="int32")
github inducer / boxtree / boxtree / tree_build.py View on Github external
if debug:
                box_levels.finish()
                level_bl_chunk = box_levels.get()[
                        level_start_box_nrs[-2]:level_start_box_nrs[-1]]
                assert (level_bl_chunk == level).all()
                del level_bl_chunk

            if debug:
                assert (box_srcntgt_starts.get() < nsrcntgts).all()

            # }}}

            # {{{ renumber particles within split boxes

            new_user_srcntgt_ids = cl.array.empty_like(user_srcntgt_ids)
            new_srcntgt_box_ids = cl.array.empty_like(srcntgt_box_ids)

            particle_renumberer_args = (
                common_args
                + (box_has_children, force_split_box,
                   new_user_srcntgt_ids, new_srcntgt_box_ids))

            evt = knl_info.particle_renumberer_kernel(*particle_renumberer_args,
                    range=slice(nsrcntgts), wait_for=wait_for)

            wait_for = [evt]

            fin_debug("particle renumbering")

            user_srcntgt_ids = new_user_srcntgt_ids
            del new_user_srcntgt_ids
            srcntgt_box_ids = new_srcntgt_box_ids