Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
raise
a_dest = cl.Buffer(context, cl.mem_flags.READ_WRITE, a.nbytes)
samp = cl.Sampler(context, False,
cl.addressing_mode.CLAMP,
cl.filter_mode.NEAREST)
prg.copy_image(queue, a.shape, None, a_dest, a_img, samp,
np.int32(a.strides[0]/a.dtype.itemsize))
a_result = np.empty_like(a)
cl.enqueue_copy(queue, a_result, a_dest)
good = la.norm(a_result - a) == 0
if not good:
if queue.device.type & cl.device_type.CPU:
assert good, ("The image implementation on your CPU CL platform '%s' "
"returned bad values. This is bad, but common."
% queue.device.platform)
else:
assert good
from pytools import all
from pyopencl.characterize import has_double_support
self.code_variables = dict(
np=np,
dtype_to_ctype=dtype_to_ctype,
preamble=preamble,
name_prefix=name_prefix,
index_dtype=self.index_dtype,
scan_dtype=dtype,
is_segmented=self.is_segmented,
arg_dtypes=arg_dtypes,
arg_ctypes=arg_ctypes,
scan_expr=_process_code_for_macro(scan_expr),
neutral=_process_code_for_macro(neutral),
is_gpu=bool(self.devices[0].type & cl.device_type.GPU),
double_support=all(
has_double_support(dev) for dev in devices),
)
# }}}
self.finish_setup()
def __init__(self, record_best, cl_mode, cl_device = None):
"""Initialize the class.
"""
if cl_mode:
import pyopencl as cl
import pyopencl.array, pyopencl.tools, pyopencl.clrandom
if cl_device == 'gpu':
gpu_devices = []
for platform in cl.get_platforms():
try: gpu_devices += platform.get_devices(device_type=cl.device_type.GPU)
except: pass
self.ctx = cl.Context(gpu_devices)
elif cl_device == 'cpu':
cpu_devices = []
for platform in cl.get_platforms():
try: cpu_devices += platform.get_devices(device_type=cl.device_type.CPU)
except: pass
self.ctx = cl.Context([cpu_devices[0]])
else:
self.ctx = cl.create_some_context()
self.queue = cl.CommandQueue(self.ctx)
self.mem_pool = cl.tools.MemoryPool(cl.tools.ImmediateAllocator(self.queue))
self.mf = cl.mem_flags
self.device = self.ctx.get_info(cl.context_info.DEVICES)[0]
self.device_type = self.device.type
def _set_device_type(self, device_type):
'''Sets the device type'''
if device_type.upper() == 'ACCELERATOR':
self.device_type = cl.device_type.ACCELERATOR
elif device_type.upper() == 'GPU':
self.device_type = cl.device_type.GPU
elif device_type.upper() == 'CPU':
self.device_type = cl.device_type.CPU
else:
self.logger.warning("Warning: device type is set to default: GPU")
self.device_type = cl.device_type.GPU
if not found_pyopencl:
return None
if cls.ctx is None:
csetup = PyOpenCLHostTimer("ctx_setup",0)
csetup .start()
platform = cl.get_platforms()[cls.plat_id]
device = platform.get_devices()[cls.dev_id]
cinfo = "OpenCL Context Info\n"
cinfo += " Using platform id = %d\n" % cls.plat_id
cinfo += " Platform name: %s\n" % platform.name
cinfo += " Platform profile: %s\n" % platform.profile
cinfo += " Platform vendor: %s\n" % platform.vendor
cinfo += " Platform version: %s\n" % platform.version
cinfo += " Using device id = %d\n" % cls.dev_id
cinfo += " Device name: %s\n" % device.name
cinfo += " Device type: %s\n" % cl.device_type.to_string(device.type)
cinfo += " Device memory: %s\n" % device.global_mem_size
cinfo += " Device max clock speed: %s MHz\n" % device.max_clock_frequency
cinfo += " Device compute units: %s\n" % device.max_compute_units
info(cinfo)
cls.device = device
cls.ctx = cl.Context([device])
cls.ctx_info = cinfo
csetup.stop()
PyOpenCLContextManager.add_host_event(csetup)
return cls.ctx
@classmethod
"""
nv_compute_cap = nv_compute_capability(dev)
if nv_compute_cap is not None:
if nv_compute_cap < (2, 0):
return 16
else:
if nv_compute_cap >= (3, 0):
from warnings import warn
warn("wildly guessing conflicting local access size on '%s'"
% dev,
CLCharacterizationWarning)
return 32
if dev.type & cl.device_type.GPU:
from warnings import warn
warn("wildly guessing conflicting local access size on '%s'"
% dev,
CLCharacterizationWarning)
return 16
elif dev.type & cl.device_type.CPU:
return 1
else:
from warnings import warn
warn("wildly guessing conflicting local access size on '%s'"
% dev,
CLCharacterizationWarning)
return 16
self.OCLModel = QStandardItemModel()
oclNoneItem, oclNoneItemName = self.addParam(self.OCLModel,
"None",
"None")
oclNoneItem, oclNoneItemName = self.addParam(self.OCLModel,
"auto",
"auto")
if isOpenCL:
iDeviceCPU = []
iDeviceGPU = []
for platform in cl.get_platforms():
try: # at old pyopencl versions:
CPUdevices =\
platform.get_devices(
device_type=cl.device_type.CPU)
GPUdevices =\
platform.get_devices(
device_type=cl.device_type.GPU)
except cl.RuntimeError:
pass
if len(CPUdevices) > 0:
if len(iDeviceCPU) > 0:
if CPUdevices[0].vendor == \
CPUdevices[0].platform.vendor:
iDeviceCPU = CPUdevices
else:
iDeviceCPU.extend(CPUdevices)
iDeviceGPU.extend(GPUdevices)
if len(iDeviceCPU) > 0:
oclNoneItem, oclNoneItemName = self.addParam(self.OCLModel,
import pyopencl as cl
import pyopencl.array as clarray
from time import time
import numpy as np
import os
from numpy import uint32, int32
from kernel_util import *
common_lib_path = 'clcommons/include'
base_path = os.path.dirname(os.path.realpath(__file__))
#ctx = cl.create_some_context()
platform = cl.get_platforms()[0]
devices = [device for device in platform.get_devices() if device.type == cl.device_type.GPU]
device = [devices[0]]
queue_properties = cl.command_queue_properties.PROFILING_ENABLE | cl.command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE
ctx = cl.Context(devices)
queues = [cl.CommandQueue(ctx, device, properties=queue_properties) for device in devices]
#multicontext
#ctxs = [cl.Context(device) for device in devices]
#queues = [cl.CommandQueue(ctx, device, properties=queue_properties) for ctx, device in zip(ctxs, devices)]
queue = queues[0]
compute_units = max([device.max_compute_units for device in devices])
device_wg_size = min([wavefront_wg_size(device) for device in devices])
default_max_wg_size = max([device.max_work_group_size for device in devices])
default_wg_size = device_wg_size
is_amd_platform = all([is_device_amd(device) for device in devices])
is_gpu_platform = all([device.type == cl.device_type.GPU for device in devices])
is_amd_gpu_platform = all([(is_device_amd(device) and device.type == cl.device_type.GPU) for device in devices])
is_nvidia_platform = all([is_device_nvidia(device) for device in devices])
def device_type_from_string(cl_device_type_str):
"""Converts values like ``gpu`` to a pyopencl device type string.
Supported values are: ``accelerator``, ``cpu``, ``custom``, ``gpu``. If ``all`` is given, None is returned.
Args:
cl_device_type_str (str): The string we want to convert to a device type.
Returns:
cl.device_type: the pyopencl device type.
"""
cl_device_type_str = cl_device_type_str.upper()
if hasattr(cl.device_type, cl_device_type_str):
return getattr(cl.device_type, cl_device_type_str)
return None
devices = [device for device in platform.get_devices() if device.type == cl.device_type.GPU]
device = [devices[0]]
queue_properties = cl.command_queue_properties.PROFILING_ENABLE | cl.command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE
ctx = cl.Context(devices)
queues = [cl.CommandQueue(ctx, device, properties=queue_properties) for device in devices]
#multicontext
#ctxs = [cl.Context(device) for device in devices]
#queues = [cl.CommandQueue(ctx, device, properties=queue_properties) for ctx, device in zip(ctxs, devices)]
queue = queues[0]
compute_units = max([device.max_compute_units for device in devices])
device_wg_size = min([wavefront_wg_size(device) for device in devices])
default_max_wg_size = max([device.max_work_group_size for device in devices])
default_wg_size = device_wg_size
is_amd_platform = all([is_device_amd(device) for device in devices])
is_gpu_platform = all([device.type == cl.device_type.GPU for device in devices])
is_amd_gpu_platform = all([(is_device_amd(device) and device.type == cl.device_type.GPU) for device in devices])
is_nvidia_platform = all([is_device_nvidia(device) for device in devices])
def cl_opt_decorate(kop, CL_FLAGS, max_wg_size_used = None, max_wg_size = None):
if is_amd_gpu_platform:
CL_FLAGS2 = '-D AMD_GPU_ARCH -D DEVICE_WAVEFRONT_SIZE={wavefront_size} '.format(wavefront_size=device_wg_size)
if max_wg_size_used is not None and np.prod(max_wg_size_used, dtype=np.uint32) <= device_wg_size:
CL_FLAGS2 = CL_FLAGS2 + '-D PROMISE_WG_IS_WAVEFRONT '
CL_FLAGS = CL_FLAGS2 + CL_FLAGS
elif is_nvidia_platform:
CL_FLAGS2 = '-D NVIDIA_ARCH -D DEVICE_WAVEFRONT_SIZE={wavefront_size} '.format(wavefront_size=device_wg_size)
#if max_wg_size_used is not None and np.prod(max_wg_size_used, dtype=np.uint32) <= device_wg_size:
# CL_FLAGS2 = CL_FLAGS2 + '-D PROMISE_WG_IS_WAVEFRONT '
#causes segfault in NvCliCompileBitcode - seems like internal compiler error
CL_FLAGS = CL_FLAGS2 + CL_FLAGS
if max_wg_size is None: