Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if traversal.from_sep_close_bigger_starts is not None:
self.from_sep_close_bigger_starts_dev = cl.array.to_device(
queue, traversal.from_sep_close_bigger_starts)
self.from_sep_close_bigger_lists_dev = cl.array.to_device(
queue, traversal.from_sep_close_bigger_lists)
# helper kernel for ancestor box query
self.mark_parent_knl = cl.elementwise.ElementwiseKernel(
queue.context,
"__global char *current, __global char *parent, "
"__global %s *box_parent_ids" % dtype_to_ctype(self.tree.box_id_dtype),
"if(i != 0 && current[i]) parent[box_parent_ids[i]] = 1"
)
# helper kernel for adding boxes from interaction list 1 and 4
self.add_interaction_list_boxes = cl.elementwise.ElementwiseKernel(
queue.context,
Template("""
__global ${box_id_t} *box_list,
__global char *responsible_boxes_mask,
__global ${box_id_t} *interaction_boxes_starts,
__global ${box_id_t} *interaction_boxes_lists,
__global char *src_boxes_mask
""", strict_undefined=True).render(
box_id_t=dtype_to_ctype(self.tree.box_id_dtype)
),
Template(r"""
typedef ${box_id_t} box_id_t;
box_id_t current_box = box_list[i];
if(responsible_boxes_mask[current_box]) {
for(box_id_t box_idx = interaction_boxes_starts[i];
box_idx < interaction_boxes_starts[i + 1];
self.kernels.set_to_i = ElementwiseKernel(ctx,
"""int* array, int value""",
"""array[i] = value;""",)
self.kernels.lcc = ElementwiseKernel(ctx,
"""float *gcc, float *map_ave, float *map2_ave,
float norm_factor, float *lcc, float varlimit""",
"""float var = norm_factor*map2_ave[i] - pown(map_ave[i], 2);
if (var > varlimit)
lcc[i] = gcc[i]/sqrt(var);
else
lcc[i] = 0.0f;""",
)
self.kernels.take_best = ElementwiseKernel(ctx,
"""float *lcc, float *best_lcc, int *rotmat_ind, int ind""",
"""if (lcc[i] > best_lcc[i]) {
best_lcc[i] = lcc[i];
preamble=common_kernel
)
update_e_dispersive_B_context = elwise_jinja_env.get_template('update_e_dispersive_B.cl').render(
REAL = self.datatypes['REAL'],
NX_FIELDS = self.G.Ex.shape[0],
NY_FIELDS = self.G.Ex.shape[1],
NZ_FIELDS = self.G.Ex.shape[2],
NX_ID = self.G.ID.shape[1],
NY_ID = self.G.ID.shape[2],
NZ_ID = self.G.ID.shape[3],
NX_T = nx_t,
NY_T = ny_t,
NZ_T = nz_t,
)
self.update_e_dispersive_B = ElementwiseKernel(
self.context,
Template("int NX, int NY, int NZ, int MAXPOLES, __global const ${COMPLEX}_t* restrict updatecoeffsdispersive, __global ${COMPLEX}_t *Tx, __global ${COMPLEX}_t *Ty, __global ${COMPLEX}_t *Tz, __global const unsigned int* restrict ID, __global const $REAL* restrict Ex, __global const $REAL* restrict Ey, __global const $REAL* restrict Ez").substitute({'REAL':self.datatypes['REAL'],'COMPLEX':self.datatypes['COMPLEX']}),
update_e_dispersive_B_context,
"update_e_dispersive_B",
preamble=common_kernel
)
# get update-e field Kernel codes
e_update_context = elwise_jinja_env.get_template('update_field_e.cl').render(
NX_FIELDS = self.G.Ex.shape[0],
NY_FIELDS = self.G.Ex.shape[1],
NZ_FIELDS = self.G.Ex.shape[2],
NX_ID = self.G.ID.shape[1],
NY_ID = self.G.ID.shape[2],
NZ_ID = self.G.ID.shape[3]
)
fetch_local_tgt_knl = cl.elementwise.ElementwiseKernel(
context,
fetch_local_paticles_arguments.render(
mask_t=dtype_to_ctype(global_tree.particle_id_dtype),
coord_t=dtype_to_ctype(global_tree.coord_dtype),
ndims=global_tree.dimensions,
particles_have_extent=global_tree.targets_have_extent
),
fetch_local_particles_prg.render(
particle_id_t=dtype_to_ctype(global_tree.particle_id_dtype),
ndims=global_tree.dimensions,
particles_have_extent=global_tree.targets_have_extent
)
)
generate_box_particle_starts = cl.elementwise.ElementwiseKernel(
context,
Template("""
__global ${particle_id_t} *old_starts,
__global ${particle_id_t} *particle_scan,
__global ${particle_id_t} *new_starts
""", strict_undefined=True).render(
particle_id_t=dtype_to_ctype(global_tree.particle_id_dtype)
),
"new_starts[i] = particle_scan[old_starts[i]]",
name="generate_box_particle_starts"
)
generate_box_particle_counts_nonchild = cl.elementwise.ElementwiseKernel(
context,
Template("""
__global char *res_boxes,
def generate_local_travs(
local_tree, box_bounding_box=None, comm=MPI.COMM_WORLD,
well_sep_is_n_away=1, from_sep_smaller_crit=None,
merge_close_lists=False):
start_time = time.time()
d_tree = local_tree.to_device(queue)
# Modify box flags for targets
from boxtree import box_flags_enum
box_flag_t = dtype_to_ctype(box_flags_enum.dtype)
modify_target_flags_knl = cl.elementwise.ElementwiseKernel(
queue.context,
Template("""
__global ${particle_id_t} *box_target_counts_nonchild,
__global ${particle_id_t} *box_target_counts_cumul,
__global ${box_flag_t} *box_flags
""").render(particle_id_t=dtype_to_ctype(local_tree.particle_id_dtype),
box_flag_t=box_flag_t),
Template("""
box_flags[i] &= (~${HAS_OWN_TARGETS});
box_flags[i] &= (~${HAS_CHILD_TARGETS});
if(box_target_counts_nonchild[i]) box_flags[i] |= ${HAS_OWN_TARGETS};
if(box_target_counts_nonchild[i] < box_target_counts_cumul[i])
box_flags[i] |= ${HAS_CHILD_TARGETS};
""").render(HAS_OWN_TARGETS=("(" + box_flag_t + ") " +
str(box_flags_enum.HAS_OWN_TARGETS)),
HAS_CHILD_TARGETS=("(" + box_flag_t + ") " +
from pycbc.scheme import mgr
threshold_op = """
if (i == 0)
bn[0] = 0;
cfloat_t val = in[i];
if ( cfloat_abs(val) > threshold){
int n_w = atomic_add(bn, 1);
outv[n_w] = val;
outl[n_w] = i;
}
"""
threshold_kernel = ElementwiseKernel(mgr.state.context,
" %(tp_in)s *in, %(tp_out1)s *outv, %(tp_out2)s *outl, %(tp_th)s threshold, %(tp_n)s *bn" % {
"tp_in": dtype_to_ctype(numpy.complex64),
"tp_out1": dtype_to_ctype(numpy.complex64),
"tp_out2": dtype_to_ctype(numpy.uint32),
"tp_th": dtype_to_ctype(numpy.float32),
"tp_n": dtype_to_ctype(numpy.uint32),
},
threshold_op,
"getstuff")
n = pzeros(mgr.state.queue, 1, numpy.uint32)
val = pzeros(mgr.state.queue, 4096*256, numpy.complex64)
loc = pzeros(mgr.state.queue, 4096*256, numpy.uint32)
def threshold(series, value):
operation = Template(
DFS_TEMPLATE % dict(setup=setup,
leaf_operation=leaf_operation,
node_operation=node_operation,
common_operation=common_operation,
output_expr=output_expr),
disable_unicode=disable_unicode
).render(data_t=data_t, sorted=sorted)
args = Template(
"int *unique_cids, int *cids, int *offsets, " + args,
disable_unicode=disable_unicode
).render(data_t=data_t, sorted=sorted)
kernel = ElementwiseKernel(
ctx, args, operation=operation, preamble=premable)
def callable(octree_src, octree_dst, *args):
return kernel(
octree_dst.unique_cids.array[:octree_dst.unique_cid_count],
octree_dst.cids.array, octree_src.offsets.array, *args
)
return callable
name="generate_box_particle_starts"
)
generate_box_particle_counts_nonchild = cl.elementwise.ElementwiseKernel(
queue.context,
Template("""
__global char *res_boxes,
__global ${particle_id_t} *old_counts_nonchild,
__global ${particle_id_t} *new_counts_nonchild
""", strict_undefined=True).render(
particle_id_t=dtype_to_ctype(tree.particle_id_dtype)
),
"if(res_boxes[i]) new_counts_nonchild[i] = old_counts_nonchild[i];"
)
generate_box_particle_counts_cumul = cl.elementwise.ElementwiseKernel(
queue.context,
Template("""
__global ${particle_id_t} *old_counts_cumul,
__global ${particle_id_t} *old_starts,
__global ${particle_id_t} *new_counts_cumul,
__global ${particle_id_t} *particle_scan
""", strict_undefined=True).render(
particle_id_t=dtype_to_ctype(tree.particle_id_dtype)
),
"""
new_counts_cumul[i] =
particle_scan[old_starts[i] + old_counts_cumul[i]] -
particle_scan[old_starts[i]]
"""
)
if(box_target_counts_nonchild[i]) box_flags[i] |= ${HAS_OWN_TARGETS};
if(box_target_counts_nonchild[i] < box_target_counts_cumul[i])
box_flags[i] |= ${HAS_CHILD_TARGETS};
""").render(HAS_OWN_TARGETS=("(" + box_flag_t + ") " +
str(box_flags_enum.HAS_OWN_TARGETS)),
HAS_CHILD_TARGETS=("(" + box_flag_t + ") " +
str(box_flags_enum.HAS_CHILD_TARGETS)))
)
modify_target_flags_knl(d_tree.box_target_counts_nonchild,
d_tree.box_target_counts_cumul,
d_tree.box_flags)
# Generate local source flags
local_box_flags = d_tree.box_flags & 250
modify_own_sources_knl = cl.elementwise.ElementwiseKernel(
queue.context,
Template("""
__global ${box_id_t} *responsible_box_list,
__global ${box_flag_t} *box_flags
""").render(box_id_t=dtype_to_ctype(local_tree.box_id_dtype),
box_flag_t=box_flag_t),
Template(r"""
box_flags[responsible_box_list[i]] |= ${HAS_OWN_SOURCES};
""").render(HAS_OWN_SOURCES=("(" + box_flag_t + ") " +
str(box_flags_enum.HAS_OWN_SOURCES)))
)
modify_child_sources_knl = cl.elementwise.ElementwiseKernel(
queue.context,
Template("""
__global char *ancestor_box_mask,
elif self.backend == 'opencl':
py_data, c_data = self.cython_gen.get_func_signature(self.func)
self._correct_opencl_address_space(c_data)
from .opencl import get_context, get_queue
from pyopencl.elementwise import ElementwiseKernel
ctx = get_context()
self.queue = get_queue()
name = self.func.__name__
expr = '{func}({args})'.format(
func=name,
args=', '.join(c_data[1])
)
arguments = convert_to_float_if_needed(', '.join(c_data[0][1:]))
preamble = convert_to_float_if_needed(self.tp.get_code())
knl = ElementwiseKernel(
ctx,
arguments=arguments,
operation=expr,
preamble=preamble
)
self.c_func = knl