How to use the pyopencl.enqueue_copy function in pyopencl

To help you get started, we’ve selected a few pyopencl examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github silx-kit / pyFAI / src / test_splitBBox.py View on Github external
numpy.uint32(integ.lut_size),
                       #lut_idx_buf,
                       #lut_coef_buf,
                       lut_bufT,
                       numpy.int32(0),
                       numpy.float32(0),
                       numpy.float32(0),
                       outData_buf,
                       outCount_buf,
                       outMerge_buf)
t4 = time.time()
program.lut_integrate_lutT(q, (bins,), (16,), *args_bufT)
b = numpy.empty(bins, dtype=numpy.float32)
c = numpy.empty(bins, dtype=numpy.float32)
d = numpy.empty(bins, dtype=numpy.float32)
pyopencl.enqueue_copy(q, c, outData_buf)
pyopencl.enqueue_copy(q, d, outCount_buf)
pyopencl.enqueue_copy(q, b, outMerge_buf).wait()
t5 = time.time()
pylab.plot(a, b, label="OpenCL_imageT")
print "OpenCL speed-up: %s setup: %.2fms \texec: %.2fms" % (0.001 * ref_time / (t5 - t3), 1000 * (t4 - t3), 1000 * (t5 - t4))
print abs(ra - a).max(), abs(rb - b).max(), abs(rc - c).max(), abs(rd - d).max()
for j in list_size:
    st = time.time()
    program.lut_integrate_lutT(q, (bins,), (j,), * args_bufT)
    pyopencl.enqueue_copy(q, b, outMerge_buf).wait()
    print("Size: %s \ttime: %.2fms" % (j, 1000 * (time.time() - st)))


#plot(ee)
#pylab.plot(a, b, label="OpenCL")
pylab.legend()
github inducer / pyopencl / test / test_wrapper.py View on Github external
try:
        knl(queue, a.shape, None, a_buf, 2, 3)
        assert False, "PyOpenCL should not accept bare Python types as arguments"
    except cl.LogicError:
        pass

    try:
        prg.mult(queue, a.shape, None, a_buf, float(2), 3)
        assert False, "PyOpenCL should not accept bare Python types as arguments"
    except cl.LogicError:
        pass

    prg.mult(queue, a.shape, None, a_buf, np.float32(2), np.int32(3))

    a_result = np.empty_like(a)
    cl.enqueue_copy(queue, a_buf, a_result).wait()
github pyacq / pyacq / pyacq / dsp / overlapfiltfilt.py View on Github external
def compute_backward(self, chunk):
        if not chunk.flags['C_CONTIGUOUS']:
            chunk = chunk.copy()
        
        self.zi2[:]=0
        pyopencl.enqueue_copy(self.queue,  self.zi2_cl, self.zi2)
        
        if chunk.shape[0]==self.backward_chunksize:
            pyopencl.enqueue_copy(self.queue,  self.input2_cl, chunk)
        else:
            #side effect at the begining
            chunk2 = np.zeros((self.backward_chunksize, self.nb_channel), dtype=self.dtype)
            chunk2[-chunk.shape[0]:, :] = chunk
            pyopencl.enqueue_copy(self.queue,  self.input2_cl, chunk2)
            
        kern_call = getattr(self.opencl_prg, 'backward_filter')
        event = kern_call(self.queue, self.global_size, self.local_size,
                                self.input2_cl, self.output2_cl, self.coefficients_cl, self.zi2_cl)
        event.wait()
        
        pyopencl.enqueue_copy(self.queue,  self.output2, self.output2_cl)
        if chunk.shape[0]==self.backward_chunksize:
github benshope / PyOpenCL-Tutorial / 130_game_of_life.py View on Github external
for(int curiter = 0; curiter < maxiter; curiter++) {
            nreal = real*real - imag*imag + q[gid].x;
            imag = 2* real*imag + q[gid].y;
            real = nreal;

            if (real*real + imag*imag > 4.0f)
                 output[gid] = curiter;
        }
    }
    """).build()

    program.mandelbrot(queue, output.shape, None, q_opencl,
            output_opencl, numpy.uint16(maxiter))

    cl.enqueue_copy(queue, output, output_opencl).wait()

    return output
github silx-kit / pyFAI / sandbox / profile_ocl_lut_pixelsplit3.py View on Github external
d_image_float = cl.array.empty(queue, (size,), dtype=numpy.float32)

# program.s32_to_float(queue, global_size, (workgroup_size,), d_image.data, d_image_float)  # Pilatus1M
program.u16_to_float(queue, global_size, (workgroup_size,), d_image.data, d_image_float.data)  # halfccd

program.csr_integrate(queue, (bins * workgroup_size,), (workgroup_size,), d_image_float.data, d_data.data, d_indices.data, d_idx_ptr.data, d_outData.data, d_outCount.data, d_outMerge.data)


# outData  = numpy.ndarray(bins, dtype=numpy.float32)
# outCount = numpy.ndarray(bins, dtype=numpy.float32)
outMerge = numpy.ndarray(bins, dtype=numpy.float32)


# cl.enqueue_copy(queue,outData, d_outData)
# cl.enqueue_copy(queue,outCount, d_outCount)
cl.enqueue_copy(queue, outMerge, d_outMerge.data)

# program.integrate2(queue, (1024,), (workgroup_size,), d_outData, d_outCount, d_outMerge)

# cl.enqueue_copy(queue,outData, d_outData)
# cl.enqueue_copy(queue,outCount, d_outCount)
# cl.enqueue_copy(queue,outMerge, d_outMerge)



# ref = ai.integrate1d(data,bins,unit="2th_deg", correctSolidAngle=False, method="splitpixelfull")

ref = splitPixelFullLUT.HistoLUT1dFullSplit(pos, bins, unit="2th_deg")
github tridesclous / tridesclous / tridesclous / peeler_engine_geometry.py View on Github external
event = pyopencl.enqueue_copy(self.queue,  self.distance_templates_cl, self.distance_templates)
            self.distance_shifts[:] = 0
            event = pyopencl.enqueue_copy(self.queue,  self.distance_shifts_cl, self.distance_shifts)
            
            #~ rms_waveform_channel = np.sum(waveform**2, axis=0).astype('float32')
            #~ pyopencl.enqueue_copy(self.queue,  self.rms_waveform_channel_cl, rms_waveform_channel)
            
            
            #~ event = self.kern_explore_templates(self.queue,  self.cl_global_size, self.cl_local_size,
                        #~ self.one_waveform_cl, self.catalogue_center_cl,
                        #~ self.sparse_mask_level3_cl,
                        #~ self.rms_waveform_channel_cl, self.distance_templates_cl,  self.channel_distances_cl, 
                        #~ self.adjacency_radius_um_cl, np.int32(chan_ind))
            self.kern_explore_templates.set_arg(7, np.int32(chan_ind))
            event = pyopencl.enqueue_nd_range_kernel(self.queue,  self.kern_explore_templates, self.cl_global_size, self.cl_local_size,)
            pyopencl.enqueue_copy(self.queue,  self.distance_templates, self.distance_templates_cl)
            
            cluster_idx = np.argmin(self.distance_templates)
            shift = None
            
            # TODO avoid double enqueue
            long_waveform = self.fifo_residuals[left_ind-self.maximum_jitter_shift:left_ind+self.peak_width+self.maximum_jitter_shift+1,:]
            pyopencl.enqueue_copy(self.queue,  self.long_waveform_cl, long_waveform)
            #~ event = self.kern_explore_shifts(
                                        #~ self.queue,  self.cl_global_size2, self.cl_local_size2,
                                        #~ self.long_waveform_cl,
                                        #~ self.catalogue_center_cl,
                                        #~ self.sparse_mask_level2_cl, 
                                        #~ self.distance_shifts_cl,
                                        #~ np.int32(cluster_idx))
            self.kern_explore_shifts.set_arg(4, np.int32(cluster_idx))
            event = pyopencl.enqueue_nd_range_kernel(self.queue,  self.kern_explore_shifts, self.cl_global_size2, self.cl_local_size2,)
github PyOCL / OpenCLGA / OpenCLGA / ocl_ga.py View on Github external
def __save_state(self, data):
        # save data from intenal struct
        data['generation_idx'] = self.__generation_index
        data['statistics'] = self.__dictStatistics
        data['generation_time_diff'] = self.__generation_time_diff
        data['population'] = self.__population

        # read data from kernel
        rnum = numpy.zeros(self.__population, dtype=numpy.uint32)
        cl.enqueue_copy(self.__queue, rnum, self.__dev_rnum)
        cl.enqueue_copy(self.__queue, self.__fitnesses, self.__dev_fitnesses)
        cl.enqueue_copy(self.__queue, self.__np_chromosomes, self.__dev_chromosomes)

        # save kernel memory to data
        data['rnum'] = rnum
        data['fitnesses'] = self.__fitnesses
        data['chromosomes'] = self.__np_chromosomes
        data['best'] = self.__best_fitnesses[0]
        data['worst'] = self.__worst_fitnesses[0]
        data['avg'] = self.__avg

        # save algorithm information
        data['prob_mutation'] = self.__prob_mutation
        data['prob_crossover'] = self.__prob_crossover

        self.__sample_chromosome.save(data, self.__ctx, self.__queue, self.__population)
github silx-kit / pyFAI / sandbox / profile_ocl_hist_pixelsplit.py View on Github external
cl.enqueue_copy(queue, outCount, d_outCount)
cl.enqueue_copy(queue, outMerge, d_outMerge)

global_size = (data.size + workgroup_size - 1) & ~(workgroup_size - 1),

d_image = cl.array.to_device(queue, data)
d_image_float = cl.Buffer(ctx, mf.READ_WRITE, 4 * size)

# program.s32_to_float(queue, global_size, (workgroup_size,), d_image.data, d_image_float)  # Pilatus1M
program.u16_to_float(queue, global_size, (workgroup_size,), d_image.data, d_image_float)  # halfccd

program.integrate1(queue, global_size, (workgroup_size,), d_pos.data, d_image_float, d_minmax, numpy.int32(data.size), d_outData, d_outCount)

cl.enqueue_copy(queue, outData, d_outData)
cl.enqueue_copy(queue, outCount, d_outCount)
cl.enqueue_copy(queue, outMerge, d_outMerge)

program.integrate2(queue, (1024,), (workgroup_size,), d_outData, d_outCount, d_outMerge)

cl.enqueue_copy(queue, outData, d_outData)
cl.enqueue_copy(queue, outCount, d_outCount)
cl.enqueue_copy(queue, outMerge, d_outMerge)



ref = ai.xrpd_LUT(data, bins, correctSolidAngle=False)
test = splitPixelFull.fullSplit1D(pos, data, bins)

# assert(numpy.allclose(ref,outMerge))

# plot(outMerge, label="ocl_hist")
plot(ref[0], test[1], label="splitPixelFull")
github tridesclous / tridesclous / tridesclous / peeler_engine_oldclassic.py View on Github external
* h1_norm2: error at order1
          * h2_norm2: error at order2
        """
        # This line is the slower part !!!!!!
        # cluster_idx = np.argmin(np.sum(np.sum((catalogue['centers0']-waveform)**2, axis = 1), axis = 1))
        
        catalogue = self.catalogue
        
        if label is None:
            #~ if self.use_opencl_with_sparse:
            if self.argmin_method == 'opencl':
                t1 = time.perf_counter()
                rms_waveform_channel = np.sum(waveform**2, axis=0).astype('float32')
                
                pyopencl.enqueue_copy(self.queue,  self.one_waveform_cl, waveform)
                pyopencl.enqueue_copy(self.queue,  self.rms_waveform_channel_cl, rms_waveform_channel)
                event = self.kern_waveform_distance(self.queue,  self.cl_global_size, self.cl_local_size,
                            self.one_waveform_cl, self.catalogue_center_cl, self.sparse_mask_cl, 
                            self.rms_waveform_channel_cl, self.waveform_distance_cl)
                pyopencl.enqueue_copy(self.queue,  self.waveform_distance, self.waveform_distance_cl)
                cluster_idx = np.argmin(self.waveform_distance)
                t2 = time.perf_counter()
                #~ print('       np.argmin opencl_with_sparse', (t2-t1)*1000., cluster_idx)


            #~ elif self.use_pythran_with_sparse:
            elif self.argmin_method == 'pythran':
                s = pythran_tools.pythran_loop_sparse_dist(waveform, 
                                    catalogue['centers0'],  self.sparse_mask)
                cluster_idx = np.argmin(s)
            elif self.argmin_method == 'numba':
                s = numba_loop_sparse_dist(waveform, catalogue['centers0'],  self.sparse_mask)
github Bitmessage / PyBitmessage / src / openclpow.py View on Github external
kernel = program.kernel_sha512
    worksize = kernel.get_work_group_info(cl.kernel_work_group_info.WORK_GROUP_SIZE, enabledGpus[0])

    kernel.set_arg(0, hash_buf)
    kernel.set_arg(1, dest_buf)

    progress = 0
    globamt = worksize * 2000

    while output[0][0] == 0 and shutdown == 0:
        kernel.set_arg(2, pack("