Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if not(USE_CPP_SIFT) and (100 < keypoints_end-keypoints_start): print "NOTE: Python implementation of descriptors is slow. Do not handle more than 100 keypoints, or grab a coffee..."
if (USE_CPU):
print "Using CPU-optimized kernels"
wg = 1,
shape = keypoints.shape[0]*wg[0],
else:
# wg = (8, 8, 8)
# shape = int(keypoints.shape[0]*wg[0]), 8, 8
wg = (8, 4, 4)
shape = int(keypoints.shape[0]*wg[0]), 4, 4
gpu_keypoints = pyopencl.array.to_device(queue, keypoints)
#NOTE: for the following line, use pyopencl.array.empty instead of pyopencl.array.zeros if the keypoints are compacted
gpu_descriptors = pyopencl.array.zeros(queue, (keypoints_end - keypoints_start, 128), dtype=numpy.uint8, order="C")
gpu_grad = pyopencl.array.to_device(queue, grad)
gpu_ori = pyopencl.array.to_device(queue, ori)
keypoints_start, keypoints_end = numpy.int32(keypoints_start), numpy.int32(keypoints_end)
grad_height, grad_width = numpy.int32(grad.shape)
counter = pyopencl.array.to_device(queue, keypoints_end)
t0 = time.time()
k1 = self.program.descriptor(queue, shape, wg,
gpu_keypoints.data, gpu_descriptors.data, gpu_grad.data, gpu_ori.data, numpy.int32(octsize),
keypoints_start, counter.data, grad_width, grad_height)
res = gpu_descriptors.get()
t1 = time.time()
if (USE_CPP_SIFT):
import feature
sc = feature.SiftAlignment()
"""
self.queue = queue
self.traversal = traversal
self.tree = traversal.tree
# {{{ fetch tree structure and interaction lists to device memory
self.box_parent_ids_dev = cl.array.to_device(queue, self.tree.box_parent_ids)
self.target_boxes_dev = cl.array.to_device(queue, traversal.target_boxes)
self.target_or_target_parent_boxes_dev = cl.array.to_device(
queue, traversal.target_or_target_parent_boxes)
# list 1
self.neighbor_source_boxes_starts_dev = cl.array.to_device(
queue, traversal.neighbor_source_boxes_starts)
self.neighbor_source_boxes_lists_dev = cl.array.to_device(
queue, traversal.neighbor_source_boxes_lists)
# list 2
self.from_sep_siblings_starts_dev = cl.array.to_device(
queue, traversal.from_sep_siblings_starts)
self.from_sep_siblings_lists_dev = cl.array.to_device(
queue, traversal.from_sep_siblings_lists)
# list 3
self.target_boxes_sep_smaller_by_source_level_dev = np.empty(
(self.tree.nlevels,), dtype=object)
for ilevel in range(self.tree.nlevels):
self.target_boxes_sep_smaller_by_source_level_dev[ilevel] = \
cl.array.to_device(
queue,
traversal.target_boxes_sep_smaller_by_source_level[ilevel]
self.from_sep_bigger_lists_dev = cl.array.to_device(
queue, traversal.from_sep_bigger_lists)
# }}}
if self.tree.targets_have_extent:
# list 3 close
if traversal.from_sep_close_smaller_starts is not None:
self.from_sep_close_smaller_starts_dev = cl.array.to_device(
queue, traversal.from_sep_close_smaller_starts)
self.from_sep_close_smaller_lists_dev = cl.array.to_device(
queue, traversal.from_sep_close_smaller_lists)
# list 4 close
if traversal.from_sep_close_bigger_starts is not None:
self.from_sep_close_bigger_starts_dev = cl.array.to_device(
queue, traversal.from_sep_close_bigger_starts)
self.from_sep_close_bigger_lists_dev = cl.array.to_device(
queue, traversal.from_sep_close_bigger_lists)
# helper kernel for ancestor box query
self.mark_parent_knl = cl.elementwise.ElementwiseKernel(
queue.context,
"__global char *current, __global char *parent, "
"__global %s *box_parent_ids" % dtype_to_ctype(self.tree.box_id_dtype),
"if(i != 0 && current[i]) parent[box_parent_ids[i]] = 1"
)
# helper kernel for adding boxes from interaction list 1 and 4
self.add_interaction_list_boxes = cl.elementwise.ElementwiseKernel(
queue.context,
Template("""
pimcKernel.kernel = pimcKernel.prg.metropolis
#Initial paths are created (the initial path vector is filled with zeros,
#meaning no movement of the particles)
try:
pimcKernel.paths = cl.array.zeros(pimcKernel.queue,
(RP.nbrOfWalkers, RP.N * system.DOF),
np.float32)
#Buffer for storing number of accepted values and
#seeds for the xorshfitPRNG
pimcKernel.accepts = cl.array.zeros(pimcKernel.queue,
(pimcKernel.nbrOfThreads, ), np.uint32)
#np.random.seed(0)
pimcKernel.seeds = cl.array.to_device(pimcKernel.queue,
(np.random.randint(0, high = 2 ** 31 - 1,
size = (pimcKernel.nbrOfThreads + 1, 4))
).astype(np.uint32))
if RP.enableOperator:
#pyopencl.array objects are created for storing
#the calculated operator means from each thread
pimcKernel.operatorValues = cl.array.zeros(pimcKernel.queue,
pimcKernel.nbrOfThreads * len(RP.operators), np.float32)
if RP.enableCorrelator:
#pyopencl.array objects are created for storing
#the calculated operator means from each thread
pimcKernel.correlatorValues = cl.array.zeros(pimcKernel.queue,
(RP.nbrOfWalkers, len(RP.correlators), RP.N / 2),
np.float32)
# Use OpenCL To Add Two Random Arrays (Using PyOpenCL Arrays and Elementwise)
import pyopencl as cl # Import the OpenCL GPU computing API
import pyopencl.array as cl_array # Import PyOpenCL Array (a Numpy array plus an OpenCL buffer object)
import numpy # Import Numpy number tools
context = cl.create_some_context() # Initialize the Context
queue = cl.CommandQueue(context) # Instantiate a Queue
a = cl_array.to_device(queue, numpy.random.randn(10).astype(numpy.float32)) # Create a random pyopencl array
b = cl_array.to_device(queue, numpy.random.randn(10).astype(numpy.float32)) # Create a random pyopencl array
c = cl_array.empty_like(a) # Create an empty pyopencl destination array
sum = cl.elementwise.ElementwiseKernel(context, "float *a, float *b, float *c", "c[i] = a[i] + b[i]", "sum")
# Create an elementwise kernel object
# - Arguments: a string formatted as a C argument list
# - Operation: a snippet of C that carries out the desired map operatino
# - Name: the fuction name as which the kernel is compiled
sum(a, b, c) # Call the elementwise kernel
print("a: {}".format(a))
print("b: {}".format(b))
print("c: {}".format(c))
# Print all three arrays, to show sum() worked
def make_well_conditioned_dev_matrix(queue, shape, dtype=np.float32,
order="C", ran_factor=1, id_factor=5, inc_factor=0, od=0):
if isinstance(shape, int):
shape = (shape, shape)
l = max(shape)
eye_ish = id_factor*np.eye(l, k=od)
if inc_factor:
eye_ish[np.arange(l), np.arange(l)] = inc_factor*np.arange(l)
ary = np.asarray(
ran_factor*np.random.randn(*shape)
+ eye_ish[:shape[0], :shape[1]],
dtype=dtype, order=order)
return cl_array.to_device(queue, ary)
ctx = pyopencl.create_some_context(interactive=True)
fname = "testimages/run2_1_00148.cbf"
cbf = fabio.cbfimage.CbfImage()
data = fabio.open(fname).data
raw = cbf.read(fname, only_raw=True)
properties = pyopencl.command_queue_properties.PROFILING_ENABLE
# properties = None
queue = pyopencl.CommandQueue(ctx, properties=properties)
raw_n = numpy.fromstring(raw, dtype="int8")
size = raw_n.size
raw_d = pyopencl.array.to_device(queue, raw_n)
int_d = pyopencl.array.empty(queue, (size,), dtype="int32")
data_d = pyopencl.array.empty(queue, (data.size,), dtype="int32")
tmp1_d = pyopencl.array.zeros_like(data_d)
tmp2_d = pyopencl.array.zeros_like(data_d)
tmp3_d = pyopencl.array.zeros_like(data_d)
lem_d = pyopencl.array.empty_like(data_d)
zero_d = pyopencl.array.zeros(queue, shape=1, dtype="int32")
src = open("sandbox/cbf.cl").read()
prg = pyopencl.Program(ctx, src).build()
for i in range(11):
WG = 1 << i
print("#" * 80)
print("WG: %s" % WG)
la = pyopencl.LocalMemory(4 * WG)
def probabilities(self):
"""Gets the squared absolute value of each of the amplitudes"""
out = pycl_array.to_device(
self.queue,
np.zeros(2**self.num_qubits, dtype=np.float32)
)
program.calculate_probabilities(
self.queue,
out.shape,
None,
self.buffer.data,
out.data
)
return out.get()
import numpy as np
import pyopencl as cl
import pyopencl.array
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
n = 3
a = np.array(range(0,n**2))
print a
a = a.astype(np.float32)
print a
g_a = cl.array.to_device(queue, a)
start = 2
end = 7
subset = cl.array.to_device(queue, np.array(range(start,end)))
print subset.dtype
print "Subset Array", subset
print cl.array.subset_dot(subset,g_a,g_a)
print np.dot(a[start:end], a[start:end])