Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def execute(self):
self.program.part1(self.queue, self.a.shape, None, self.a_buf, self.dest_buf)
c = numpy.empty_like(self.a)
cl.enqueue_read_buffer(self.queue, self.dest_buf, c).wait()
print c
#return c
b_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b)
dest_buf = cl.Buffer(ctx, mf.WRITE_ONLY, b.nbytes)
params = struct.pack('ffffi', .5, 10., 0., 0., 3)
print len(params), struct.calcsize('ffffi')
params_buf = cl.Buffer(ctx, mf.READ_ONLY, len(params))
cl.enqueue_write_buffer(queue, params_buf, params).wait()
global_size = a.shape
local_size = None
prg.part3(queue, global_size, local_size, a_buf, b_buf, dest_buf, params_buf)
queue.finish()
c = np.empty_like(a)
cl.enqueue_read_buffer(queue, dest_buf, c).wait()
return c
def execute(self):
self.program.part1(self.queue, self.a.shape, None, self.a_buf, self.dest_buf)
c = numpy.empty_like(self.a)
cl.enqueue_read_buffer(self.queue, self.dest_buf, c).wait()
#print "a", self.a
print "c", c
def execute(self):
self.program.Conway(self.queue, self.a.shape, None, self.ar_ySize, self.a_buf, self.dest_buf)
cl.enqueue_read_buffer(self.queue, self.dest_buf, self.c).wait()
self.a = self.c;
print self.a
#Refresh buffers
mf = cl.mem_flags
self.a_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.a)
self.dest_buf = cl.Buffer(self.ctx, mf.WRITE_ONLY, self.a.nbytes)
% (local_size*thread_strides))
]+[
Assign(
"tgt[idx+%d]" % (o*local_size),
"op1[idx+%d] + op2[idx+%d]" % (
o*local_size,
o*local_size))
for o in range(thread_strides)]))])
knl = cl.Program(ctx, str(mod)).build().add
knl(queue, (local_size*macroblock_count,), (local_size,),
c_buf, a_buf, b_buf)
c = numpy.empty_like(a)
cl.enqueue_read_buffer(queue, c_buf, c).wait()
assert la.norm(c-(a+b)) == 0
dev_chromosomes = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR,
hostbuf=numpy.array(chromosomesArray, dtype=numpy.int32))
dev_distances = cl.Buffer(self.ctx, mf.WRITE_ONLY,
distances.nbytes)
cl.enqueue_copy(self.queue, dev_distances, distances)
exec_evt = self.prg.tsp_fitness(self.queue,
(num_of_chromosomes,),
(1,),
self.dev_points.data,
dev_chromosomes,
dev_distances,
numpy.int32(len(self.city_points)+1),
numpy.int32(num_of_chromosomes))
exec_evt.wait()
cl.enqueue_read_buffer(self.queue, dev_distances, distances).wait()
# The larger distance is the weaker fitness, so make it -1*distance.
for idx, distance in enumerate(distances):
self.update_chromosome_fitness(chromosomes[idx], -1*distance)
def reduce_lattice2():
sums = np.empty((8,4),'f')
evt = program.float4_sum(queue, (64*8,), (64,),
reduce_buf, reduce_scratch,
qxqz_buf, np.int32(lengthL+lengthR))
cl.enqueue_read_buffer(queue, reduce_buf, sums).wait()
qxqz = sums.sum(0)
evt = program.float4_sum(queue, (64*8,), (64,),
reduce_buf, reduce_scratch,
face_buf, np.int32(lengthL+lengthR))
cl.enqueue_read_buffer(queue, reduce_buf, sums).wait()
cxcz = sums[:2,:].sum(0)
return cxcz,qxqz
self.i.set_inputs( numpy.array( [1.0, 0.0], numpy.float32 ), is_blocking = True )
self.i.process()
self.assertArrayEqual( self.o.get_outputs(), [ 0.29154554 ] )
total_error_buf = pyopencl.Buffer(
self.nnc.opencl.context, pyopencl.mem_flags.READ_WRITE | pyopencl.mem_flags.COPY_HOST_PTR,
hostbuf = numpy.array( [1e12], numpy.float32 ) )
self.o._set_outputs_and_calc_errors( numpy.ones( [self.nnc.output_layer.neuron_count], numpy.float32 ), total_error_buf )
self.i.calc_weights_gradient()
err = numpy.ndarray( [ self.nnc._neurons_buf_size ], numpy.float32 )
grad = numpy.ndarray( [ self.nnc._weights_buf_size ], numpy.float32 )
pyopencl.enqueue_read_buffer( self.ocl.queue, self.nnc._errors_backpropagation_buf, err, is_blocking = True )
pyopencl.enqueue_read_buffer( self.ocl.queue, self.nnc._gradient_buf, grad, is_blocking = True )
real_err = ( 0.29154554 - 1.0 ) * 0.6666666 * ( 1.0 - 0.29154554 * 0.29154554 )
self.assertAlmostEqual( err[self.o._neurons_offset], real_err, places = 5 )
self.assertArrayEqual( grad[self.o._weights_offset:self.o._weights_offset + self.o._weights_count], [ real_err ] + list( self.o.get_inputs() * real_err ) )
self.assertArrayEqual( grad[self.i._weights_offset:self.i._weights_offset + self.i._weights_count], [ -0.033015892, -0.033015892, 0.0 ] * 2 )
self.assertArrayEqual( grad[self.h._weights_offset:self.h._weights_offset + self.h._weights_count], [ -0.08401663, -0.01113619, -0.01113619 ] * 3 )
self.assertArrayEqual( self.o._get_gradient(), [ real_err ] + list( self.o.get_inputs() * real_err ) )
self.assertArrayEqual( self.i._get_gradient(), [ -0.033015892, -0.033015892, 0.0 ] * 2 )
dest_buf = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY, output.nbytes)
kernel = program.kernel_sha512
worksize = kernel.get_work_group_info(cl.kernel_work_group_info.WORK_GROUP_SIZE, enabledGpus[0])
kernel.set_arg(0, hash_buf)
kernel.set_arg(1, dest_buf)
progress = 0
globamt = worksize * 2000
while output[0][0] == 0 and shutdown == 0:
kernel.set_arg(2, pack("
def get_inputs( self ):
"""
Returns inputs.
"""
inputs = numpy.ndarray( [ self.inputs_per_neuron - 1 ], numpy.float32 )
pyopencl.enqueue_read_buffer(
self.opencl.queue, self.context._inputs_buf, inputs,
device_offset = int( self._inputs_offset * 4 ), is_blocking = True
)
return inputs