How to use the pyopencl.create_some_context function in pyopencl

def _test_constantone(dims, nsources, ntargets, dtype):
    from mpi4py import MPI

    # Get the current rank
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()

    # Initialization
    trav = None
    sources_weights = None

    # Configure PyOpenCL
    import pyopencl as cl
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    if rank == 0:

        # Generate random particles
        from import make_normal_particle_array as p_normal
        sources = p_normal(queue, nsources, dims, dtype, seed=15)
        targets = (p_normal(queue, ntargets, dims, dtype, seed=18) +
                   np.array([2, 0, 0])[:dims])

        # Constant one source weights
        sources_weights = np.ones((nsources,), dtype=dtype)

        # Build the global tree
        from boxtree import TreeBuilder
def run():
  import pyopencl as cl
  import pyopencl.array as cla
  cx = cl.create_some_context()
  q = cl.CommandQueue(cx, properties=cl.command_queue_properties.PROFILING_ENABLE)
  img = np.zeros((H, W), np.float32)
  import moonshine
  data ='samples/sonata.png')[0].im
  img[:data.shape[0], :data.shape[1]] = data != 0
  dimg = cla.to_device(q, img[:H/sample, :W/sample].copy())
  costs = cla.zeros(q, (numx, numy, numy), np.float32)

  prg = cl.Program(cx, open("").read()).build()
  prg.boundary_cost.set_scalar_arg_dtypes([None, np.int32, np.int32, np.int32, np.int32, np.int32, np.int32, np.int32, None])

  return prg.boundary_cost(q, (numx, numy, numy), (1, 1, 1),, np.int32(W/sample), np.int32(y0), np.int32(ystep), np.int32(numy), np.int32(x0), np.int32(xstep), np.int32(numx),, costs
ai = pyFAI.load("testimages/halfccd.poni")
data ="testimages/halfccd.edf").data

workgroup_size = 256
bins = 1000

pos_in = ai.array_from_unit(data.shape, "corner", unit="2th_deg", scale=False)

pos = pos_in.reshape(pos_in.size / 8, 4, 2)

pos_size = pos.size
# size = data.size
size = pos_size / 8

ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
mf = cl.mem_flags

d_pos = cl.array.to_device(queue, pos)
d_preresult = cl.array.empty(queue, (4 * workgroup_size,), dtype=numpy.float32)
d_minmax = cl.array.empty(queue, (4,), dtype=numpy.float32)

with open("../openCL/", "r") as kernelFile:
    kernel_src =

compile_options = "-D BINS=%i  -D NIMAGE=%i -D WORKGROUP_SIZE=%i -D EPS=%e" % \
                (bins, size, workgroup_size, numpy.finfo(numpy.float32).eps)


program = cl.Program(ctx, kernel_src).build(options=compile_options)
# The simplest possible PyOpenCL program - Sums two arrays

import pyopencl as cl # Access to the OpenCL API
import numpy # Tools to create and manipulate numbers

context = cl.create_some_context()  # Create a Context (one per computer)
queue = cl.CommandQueue(context)  # Create a Command Queue (usually one per processor)
kernel = """__kernel void sum(__global float* a, __global float* b, __global float* c)
    int i = get_global_id(0);
    c[i] = a[i] + b[i];
}"""  # The C-like code that will run on the GPU

program = cl.Program(context, kernel).build() # Compile the kernel into a Program

flags = cl.mem_flags # Create a shortcut to OpenCL's memory instructions

a = numpy.random.rand(5).astype(numpy.float32)
b = numpy.random.rand(5).astype(numpy.float32)  # Create two large float arrays

a_buffer = cl.Buffer(context, flags.COPY_HOST_PTR, hostbuf=a)
b_buffer = cl.Buffer(context, flags.COPY_HOST_PTR, hostbuf=b)
Identity matrix (n x n) as GPUArray.


    return give_cl(queue, eye(n, dtype=float32))

# ==============================================================================
# Main
# ==============================================================================

if __name__ == "__main__":

    from compas.hpc import get_cl

    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    a_ = give_cl(queue, [[0, 1, 2]])

    print(get_cl(diag_cl(queue, [0, 1, 2])))
    print(get_cl(eye_cl(queue, 3)))
print "\n===== Sequential, matrix mult (dot prod), order", ORDER, "on host CPU ======\n"

for i in range(COUNT):
    start_time = time()

    print "Skipping as this takes a long time to run!"
    #seq_mat_mul_sdot(N, h_A, h_B, h_C)

    run_time = time() - start_time
    #results(N, h_C, run_time)

# Set up OpenCL
context = cl.create_some_context()
queue = cl.CommandQueue(context)

# Reset host buffers - just to play it safe
h_A = numpy.empty(size).astype(numpy.float32)
h_B = numpy.empty(size).astype(numpy.float32)
h_C = numpy.empty(size).astype(numpy.float32)

# Create OpenCL buffers
d_a = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=h_A)
d_b = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=h_B)
d_c = cl.Buffer(context, cl.mem_flags.WRITE_ONLY, h_C.nbytes)

program = cl.Program(context, C_elem_KernelSource).build()
mmul = program.mmul
def __init__(self):
        self.ctx = cl.create_some_context()
        self.queue = cl.CommandQueue(self.ctx)
def run(double_precision=False):
    context = cl.create_some_context()
    queue = cl.CommandQueue(context)

    dtype = np.complex64 if not double_precision else np.complex128
    n_run = 100 #set to 1 for testing for correct result

    if n_run > 1:
        nd_dataC = np.random.normal(size=(1024, 1024)).astype(dtype)
        nd_dataC = np.ones((1024, 1024), dtype = dtype) #set n_run to 1

    nd_dataF = np.asfortranarray(nd_dataC)
    dataC = cla.to_device(queue, nd_dataC)
    dataF = cla.to_device(queue, nd_dataF)

    nd_result = np.zeros_like(nd_dataC, dtype = dtype)
import pyopencl as cl
import numpy
from mako.template import Template

local_size = 256
thread_strides = 32
macroblock_count = 33
dtype = numpy.float32
total_size = local_size*thread_strides*macroblock_count

context = cl.create_some_context()
queue = cl.CommandQueue(context)

a = numpy.random.randn(total_size).astype(dtype)
b = numpy.random.randn(total_size).astype(dtype)
c = numpy.empty_like(a)

a_buf = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=a)
b_buf = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=b)
c_buf = cl.Buffer(context, cl.mem_flags.WRITE_ONLY, b.nbytes)

template = Template("""
    __kernel void add(
            __global ${ type_name } *tgt, 
            __global const ${ type_name } *op1, 
            __global const ${ type_name } *op2)
import pyopencl as cl
from time import time
import numpy

block_size = 32  # XXX Why sixteen? XXX

context = cl.create_some_context()
queue = cl.CommandQueue(context, properties=cl.command_queue_properties.PROFILING_ENABLE)

a_width = 2048
a_height = 2048
b_width = 2048
b_height = 2048

c_width = b_width
c_height = a_height

h_a = numpy.random.rand(a_height, a_width).astype(numpy.float32)
h_b = numpy.random.rand(b_height, b_width).astype(numpy.float32)
h_c = numpy.empty((c_height, c_width)).astype(numpy.float32)

kernel_params = {"block_size": block_size, "w_a":a_width, "h_a":a_height, "w_b":b_width}