Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_spectral_poisson(ctx_factory, grid_shape, proc_shape, h, dtype,
timing=False):
if ctx_factory:
ctx = ctx_factory()
else:
ctx = ps.choose_device_and_make_context()
queue = cl.CommandQueue(ctx)
rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape))
mpi = ps.DomainDecomposition(proc_shape, h, rank_shape)
fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype)
L = (3, 5, 7)
dx = tuple(Li / Ni for Li, Ni in zip(L, grid_shape))
dk = tuple(2 * np.pi / Li for Li in L)
if h == 0:
def get_evals_2(k, dx):
return - k**2
derivs = ps.SpectralCollocator(fft, dk)
else:
from pystella.derivs import SecondCenteredDifference
get_evals_2 = SecondCenteredDifference(h).get_eigenvalues
import pyopencl.array as cla
import pyopencl.clrandom as clr
import pystella as ps
# set parameters
grid_shape = (128, 128, 128)
proc_shape = (1, 1, 1)
rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape))
halo_shape = 1
dtype = 'float64'
dx = tuple(10 / Ni for Ni in grid_shape)
dt = min(dx) / 10
# create pyopencl context, queue, and halo-sharer
ctx = ps.choose_device_and_make_context()
queue = cl.CommandQueue(ctx)
decomp = ps.DomainDecomposition(proc_shape, halo_shape, rank_shape)
# initialize arrays with random data
f = clr.rand(queue, tuple(ni + 2 * halo_shape for ni in rank_shape), dtype)
dfdt = clr.rand(queue, tuple(ni + 2 * halo_shape for ni in rank_shape), dtype)
lap_f = cla.zeros(queue, rank_shape, dtype)
# define system of equations
f_ = ps.DynamicField('f', offset='h') # don't overwrite f
rhs_dict = {
f_: f_.dot, # df/dt = \dot{f}
f_.dot: f_.lap # d\dot{f}/dt = \nabla^2 f
}
# create time-stepping and derivative-computing kernels
stepper = ps.LowStorageRK54(rhs_dict, dt=dt, halo_shape=halo_shape)
return pyopencl.clmath.tanh(a)
# ==============================================================================
# Main
# ==============================================================================
if __name__ == "__main__":
from compas_hpc import give_cl
from compas_hpc import get_cl
from numpy import pi
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
# a = abs_cl(give_cl(queue, [-0.1, -1.7]))
# a = acos_cl(give_cl(queue, [0.5, 1]))
# a = asin_cl(give_cl(queue, [0.5, 1]))
# a = atan_cl(give_cl(queue, [0.5, 1]))
# a = cos_cl(give_cl(queue, [0, pi/4]))
# a = cosh_cl(give_cl(queue, [0, pi/4]))
# a = maximum_cl(give_cl(queue, [1, 2, 3]), give_cl(queue, [3, 2, 1]))
# a = maximum_cl(give_cl(queue, [1, 2, 3]))
# a = minimum_cl(give_cl(queue, [1, 2, 3]), give_cl(queue, [3, 2, 1]))
# a = minimum_cl(give_cl(queue, [1, 2, 3]))
# a = sin_cl(give_cl(queue, [0, pi/4]))
# a = sinh_cl(give_cl(queue, [0, pi/4]))
# a = sqrt_cl(give_cl(queue, [4, 9]))
# a = tan_cl(give_cl(queue, [0, pi/4]))
# a = tanh_cl(give_cl(queue, [0, pi/4]))
print "\n===== Sequential, matrix mult (dot prod), order", ORDER, "on host CPU ======\n"
for i in range(COUNT):
h_C.fill(0.0)
start_time = time()
print "Skipping as this takes a long time to run!"
#seq_mat_mul_sdot(N, h_A, h_B, h_C)
run_time = time() - start_time
#results(N, h_C, run_time)
# Set up OpenCL
context = cl.create_some_context()
queue = cl.CommandQueue(context)
# Reset host buffers - just to play it safe
h_A = numpy.empty(size).astype(numpy.float32)
h_A.fill(AVAL)
h_B = numpy.empty(size).astype(numpy.float32)
h_B.fill(BVAL)
h_C = numpy.empty(size).astype(numpy.float32)
# Create OpenCL buffers
d_a = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=h_A)
d_b = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=h_B)
d_c = cl.Buffer(context, cl.mem_flags.WRITE_ONLY, h_C.nbytes)
program = cl.Program(context, C_elem_KernelSource).build()
mmul = program.mmul
mmul.set_scalar_arg_dtypes([numpy.int32, None, None, None])
a[:,0] = 75 #left column
a[:,a.shape[0] - 1] = 50 #right column
ctx = cl.create_some_context()
"""
for Jacobian iteration we need two arrays, one to store the
values from timestep i and one for timestep values i+1
"""
u = np.zeros((size,size), dtype=np.float32)
initialize(u)
u_new=np.copy(u)
program = cl.Program(ctx, kernel_source).build()
queue = cl.CommandQueue(ctx)
mf = cl.mem_flags
#create the memory objects on the device
u_dev = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=u)
u_new_dev = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=u_new)
iteration = 0
for iteration in range(1000):
if iteration % 2 == 0:
program.solve(queue, (size * size,), None, u_dev, u_new_dev)
else:
program.solve(queue, (size * size,), None, u_new_dev, u_dev)
def __init__(self):
self.ctx = cl.create_some_context()
self.queue = cl.CommandQueue(self.ctx)
specific amplitude distribution in the target plane. It uses a
FFT to calculate the field propagation.
The wavefront at the DOE plane is assumed as a plane wave.
**ARGUMENTS:**
========== ======================================================
idata numpy array containing the target amplitude distribution
itera Maximum number of iterations
========== ======================================================
"""
pl=cl.get_platforms()[0]
devices=pl.get_devices(device_type=cl.device_type.GPU)
ctx = cl.Context(devices=[devices[0]])
queue = cl.CommandQueue(ctx)
plan = Plan(idata.shape, queue=queue,dtype=complex128) #no funciona con "complex128"
src = str(Template(KERNEL).render(
double_support=all(
has_double_support(dev) for dev in devices),
amd_double_support=all(
has_amd_double_support(dev) for dev in devices)
))
prg = cl.Program(ctx,src).build()
idata_gpu=cl_array.to_device(queue, ifftshift(idata).astype("complex128"))
fdata_gpu=cl_array.empty_like(idata_gpu)
rdata_gpu=cl_array.empty_like(idata_gpu)
plan.execute(idata_gpu.data,fdata_gpu.data)
import time
import pyopencl as cl
import pyopencl.array as cla
import numpy as np
import gpyfft
#NOTE: better benchmark contained in high level interface gpyfft/fft.py
G = gpyfft.GpyFFT()
print "clAmdFft Version: %d.%d.%d"%(G.get_version())
context = cl.create_some_context()
queue = cl.CommandQueue(context)
nd_data = np.ones((512, 512), dtype = np.complex64)
cl_data = cla.to_device(queue, nd_data)
cl_data_transformed = cla.empty_like(cl_data)
print 'data shape:', cl_data.shape
plan = G.create_plan(context, cl_data.shape)
plan.inplace = True #False
plan.precision = 1
print 'plan.inplace:', plan.inplace
print 'plan.precision:', plan.precision
plan.bake(queue)
int3 diff;
for(int i = 0; i < ${cols}; i++) {
diff = p - lut[i];
diff = diff*diff;
d = diff.x+diff.y+diff.z;
if(d < dmin) {
dmin = d;
h = i;
}
}
out[y*width + x] = lut_idx[h];
}
""".replace('${cols}', str(cols))
prog = cl.Program(ctx, krnl).build()
func = prog.closest
queue = cl.CommandQueue(ctx)
return cl, func, queue, ctx, g_lut, g_lut_idx
self.FatalError = pyopencl.RuntimeError
if options.opencl_interactive:
self.ctx = cl.create_some_context(True)
else:
if 'OPENCL_PLATFORM' in os.environ:
platform_num = int(os.environ['OPENCL_PLATFORM'])
else:
platform_num = 0
platform = cl.get_platforms()[platform_num]
devices = platform.get_devices(device_type=cl.device_type.GPU)
devices = [devices[gpu_id]]
self.ctx = cl.Context(devices=devices, properties=[(cl.context_properties.PLATFORM, platform)])
self.default_queue = cl.CommandQueue(self.ctx)
self.buffers = {}
self.arrays = {}
self._iteration_kernels = []