Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
numpy.uint32(integ.lut_size),
#lut_idx_buf,
#lut_coef_buf,
lut_bufT,
numpy.int32(0),
numpy.float32(0),
numpy.float32(0),
outData_buf,
outCount_buf,
outMerge_buf)
t4 = time.time()
program.lut_integrate_lutT(q, (bins,), (16,), *args_bufT)
b = numpy.empty(bins, dtype=numpy.float32)
c = numpy.empty(bins, dtype=numpy.float32)
d = numpy.empty(bins, dtype=numpy.float32)
pyopencl.enqueue_copy(q, c, outData_buf)
pyopencl.enqueue_copy(q, d, outCount_buf)
pyopencl.enqueue_copy(q, b, outMerge_buf).wait()
t5 = time.time()
pylab.plot(a, b, label="OpenCL_imageT")
print "OpenCL speed-up: %s setup: %.2fms \texec: %.2fms" % (0.001 * ref_time / (t5 - t3), 1000 * (t4 - t3), 1000 * (t5 - t4))
print abs(ra - a).max(), abs(rb - b).max(), abs(rc - c).max(), abs(rd - d).max()
for j in list_size:
st = time.time()
program.lut_integrate_lutT(q, (bins,), (j,), * args_bufT)
pyopencl.enqueue_copy(q, b, outMerge_buf).wait()
print("Size: %s \ttime: %.2fms" % (j, 1000 * (time.time() - st)))
#plot(ee)
#pylab.plot(a, b, label="OpenCL")
pylab.legend()
try:
knl(queue, a.shape, None, a_buf, 2, 3)
assert False, "PyOpenCL should not accept bare Python types as arguments"
except cl.LogicError:
pass
try:
prg.mult(queue, a.shape, None, a_buf, float(2), 3)
assert False, "PyOpenCL should not accept bare Python types as arguments"
except cl.LogicError:
pass
prg.mult(queue, a.shape, None, a_buf, np.float32(2), np.int32(3))
a_result = np.empty_like(a)
cl.enqueue_copy(queue, a_buf, a_result).wait()
def compute_backward(self, chunk):
if not chunk.flags['C_CONTIGUOUS']:
chunk = chunk.copy()
self.zi2[:]=0
pyopencl.enqueue_copy(self.queue, self.zi2_cl, self.zi2)
if chunk.shape[0]==self.backward_chunksize:
pyopencl.enqueue_copy(self.queue, self.input2_cl, chunk)
else:
#side effect at the begining
chunk2 = np.zeros((self.backward_chunksize, self.nb_channel), dtype=self.dtype)
chunk2[-chunk.shape[0]:, :] = chunk
pyopencl.enqueue_copy(self.queue, self.input2_cl, chunk2)
kern_call = getattr(self.opencl_prg, 'backward_filter')
event = kern_call(self.queue, self.global_size, self.local_size,
self.input2_cl, self.output2_cl, self.coefficients_cl, self.zi2_cl)
event.wait()
pyopencl.enqueue_copy(self.queue, self.output2, self.output2_cl)
if chunk.shape[0]==self.backward_chunksize:
for(int curiter = 0; curiter < maxiter; curiter++) {
nreal = real*real - imag*imag + q[gid].x;
imag = 2* real*imag + q[gid].y;
real = nreal;
if (real*real + imag*imag > 4.0f)
output[gid] = curiter;
}
}
""").build()
program.mandelbrot(queue, output.shape, None, q_opencl,
output_opencl, numpy.uint16(maxiter))
cl.enqueue_copy(queue, output, output_opencl).wait()
return output
d_image_float = cl.array.empty(queue, (size,), dtype=numpy.float32)
# program.s32_to_float(queue, global_size, (workgroup_size,), d_image.data, d_image_float) # Pilatus1M
program.u16_to_float(queue, global_size, (workgroup_size,), d_image.data, d_image_float.data) # halfccd
program.csr_integrate(queue, (bins * workgroup_size,), (workgroup_size,), d_image_float.data, d_data.data, d_indices.data, d_idx_ptr.data, d_outData.data, d_outCount.data, d_outMerge.data)
# outData = numpy.ndarray(bins, dtype=numpy.float32)
# outCount = numpy.ndarray(bins, dtype=numpy.float32)
outMerge = numpy.ndarray(bins, dtype=numpy.float32)
# cl.enqueue_copy(queue,outData, d_outData)
# cl.enqueue_copy(queue,outCount, d_outCount)
cl.enqueue_copy(queue, outMerge, d_outMerge.data)
# program.integrate2(queue, (1024,), (workgroup_size,), d_outData, d_outCount, d_outMerge)
# cl.enqueue_copy(queue,outData, d_outData)
# cl.enqueue_copy(queue,outCount, d_outCount)
# cl.enqueue_copy(queue,outMerge, d_outMerge)
# ref = ai.integrate1d(data,bins,unit="2th_deg", correctSolidAngle=False, method="splitpixelfull")
ref = splitPixelFullLUT.HistoLUT1dFullSplit(pos, bins, unit="2th_deg")
event = pyopencl.enqueue_copy(self.queue, self.distance_templates_cl, self.distance_templates)
self.distance_shifts[:] = 0
event = pyopencl.enqueue_copy(self.queue, self.distance_shifts_cl, self.distance_shifts)
#~ rms_waveform_channel = np.sum(waveform**2, axis=0).astype('float32')
#~ pyopencl.enqueue_copy(self.queue, self.rms_waveform_channel_cl, rms_waveform_channel)
#~ event = self.kern_explore_templates(self.queue, self.cl_global_size, self.cl_local_size,
#~ self.one_waveform_cl, self.catalogue_center_cl,
#~ self.sparse_mask_level3_cl,
#~ self.rms_waveform_channel_cl, self.distance_templates_cl, self.channel_distances_cl,
#~ self.adjacency_radius_um_cl, np.int32(chan_ind))
self.kern_explore_templates.set_arg(7, np.int32(chan_ind))
event = pyopencl.enqueue_nd_range_kernel(self.queue, self.kern_explore_templates, self.cl_global_size, self.cl_local_size,)
pyopencl.enqueue_copy(self.queue, self.distance_templates, self.distance_templates_cl)
cluster_idx = np.argmin(self.distance_templates)
shift = None
# TODO avoid double enqueue
long_waveform = self.fifo_residuals[left_ind-self.maximum_jitter_shift:left_ind+self.peak_width+self.maximum_jitter_shift+1,:]
pyopencl.enqueue_copy(self.queue, self.long_waveform_cl, long_waveform)
#~ event = self.kern_explore_shifts(
#~ self.queue, self.cl_global_size2, self.cl_local_size2,
#~ self.long_waveform_cl,
#~ self.catalogue_center_cl,
#~ self.sparse_mask_level2_cl,
#~ self.distance_shifts_cl,
#~ np.int32(cluster_idx))
self.kern_explore_shifts.set_arg(4, np.int32(cluster_idx))
event = pyopencl.enqueue_nd_range_kernel(self.queue, self.kern_explore_shifts, self.cl_global_size2, self.cl_local_size2,)
def __save_state(self, data):
# save data from intenal struct
data['generation_idx'] = self.__generation_index
data['statistics'] = self.__dictStatistics
data['generation_time_diff'] = self.__generation_time_diff
data['population'] = self.__population
# read data from kernel
rnum = numpy.zeros(self.__population, dtype=numpy.uint32)
cl.enqueue_copy(self.__queue, rnum, self.__dev_rnum)
cl.enqueue_copy(self.__queue, self.__fitnesses, self.__dev_fitnesses)
cl.enqueue_copy(self.__queue, self.__np_chromosomes, self.__dev_chromosomes)
# save kernel memory to data
data['rnum'] = rnum
data['fitnesses'] = self.__fitnesses
data['chromosomes'] = self.__np_chromosomes
data['best'] = self.__best_fitnesses[0]
data['worst'] = self.__worst_fitnesses[0]
data['avg'] = self.__avg
# save algorithm information
data['prob_mutation'] = self.__prob_mutation
data['prob_crossover'] = self.__prob_crossover
self.__sample_chromosome.save(data, self.__ctx, self.__queue, self.__population)
cl.enqueue_copy(queue, outCount, d_outCount)
cl.enqueue_copy(queue, outMerge, d_outMerge)
global_size = (data.size + workgroup_size - 1) & ~(workgroup_size - 1),
d_image = cl.array.to_device(queue, data)
d_image_float = cl.Buffer(ctx, mf.READ_WRITE, 4 * size)
# program.s32_to_float(queue, global_size, (workgroup_size,), d_image.data, d_image_float) # Pilatus1M
program.u16_to_float(queue, global_size, (workgroup_size,), d_image.data, d_image_float) # halfccd
program.integrate1(queue, global_size, (workgroup_size,), d_pos.data, d_image_float, d_minmax, numpy.int32(data.size), d_outData, d_outCount)
cl.enqueue_copy(queue, outData, d_outData)
cl.enqueue_copy(queue, outCount, d_outCount)
cl.enqueue_copy(queue, outMerge, d_outMerge)
program.integrate2(queue, (1024,), (workgroup_size,), d_outData, d_outCount, d_outMerge)
cl.enqueue_copy(queue, outData, d_outData)
cl.enqueue_copy(queue, outCount, d_outCount)
cl.enqueue_copy(queue, outMerge, d_outMerge)
ref = ai.xrpd_LUT(data, bins, correctSolidAngle=False)
test = splitPixelFull.fullSplit1D(pos, data, bins)
# assert(numpy.allclose(ref,outMerge))
# plot(outMerge, label="ocl_hist")
plot(ref[0], test[1], label="splitPixelFull")
* h1_norm2: error at order1
* h2_norm2: error at order2
"""
# This line is the slower part !!!!!!
# cluster_idx = np.argmin(np.sum(np.sum((catalogue['centers0']-waveform)**2, axis = 1), axis = 1))
catalogue = self.catalogue
if label is None:
#~ if self.use_opencl_with_sparse:
if self.argmin_method == 'opencl':
t1 = time.perf_counter()
rms_waveform_channel = np.sum(waveform**2, axis=0).astype('float32')
pyopencl.enqueue_copy(self.queue, self.one_waveform_cl, waveform)
pyopencl.enqueue_copy(self.queue, self.rms_waveform_channel_cl, rms_waveform_channel)
event = self.kern_waveform_distance(self.queue, self.cl_global_size, self.cl_local_size,
self.one_waveform_cl, self.catalogue_center_cl, self.sparse_mask_cl,
self.rms_waveform_channel_cl, self.waveform_distance_cl)
pyopencl.enqueue_copy(self.queue, self.waveform_distance, self.waveform_distance_cl)
cluster_idx = np.argmin(self.waveform_distance)
t2 = time.perf_counter()
#~ print(' np.argmin opencl_with_sparse', (t2-t1)*1000., cluster_idx)
#~ elif self.use_pythran_with_sparse:
elif self.argmin_method == 'pythran':
s = pythran_tools.pythran_loop_sparse_dist(waveform,
catalogue['centers0'], self.sparse_mask)
cluster_idx = np.argmin(s)
elif self.argmin_method == 'numba':
s = numba_loop_sparse_dist(waveform, catalogue['centers0'], self.sparse_mask)
kernel = program.kernel_sha512
worksize = kernel.get_work_group_info(cl.kernel_work_group_info.WORK_GROUP_SIZE, enabledGpus[0])
kernel.set_arg(0, hash_buf)
kernel.set_arg(1, dest_buf)
progress = 0
globamt = worksize * 2000
while output[0][0] == 0 and shutdown == 0:
kernel.set_arg(2, pack("