Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def get_array_scalar_comparison_kernel(context, operator, dtype_a):
return get_elwise_kernel(context, [
VectorArg(np.int8, "out", with_offset=True),
VectorArg(dtype_a, "a", with_offset=True),
ScalarArg(dtype_a, "b"),
],
"out[i] = a[i] %s b" % operator,
name="scalar_comparison_kernel")
def get_if_positive_kernel(context, crit_dtype, dtype):
return get_elwise_kernel(context, [
VectorArg(dtype, "result", with_offset=True),
VectorArg(crit_dtype, "crit", with_offset=True),
VectorArg(dtype, "then_", with_offset=True),
VectorArg(dtype, "else_", with_offset=True),
],
"result[i] = crit[i] > 0 ? then_[i] : else_[i]",
name="if_positive")
def get_array_binop_kernel(context, operator, dtype_res, dtype_a, dtype_b):
return get_elwise_kernel(context, [
VectorArg(dtype_res, "out", with_offset=True),
VectorArg(dtype_a, "a", with_offset=True),
VectorArg(dtype_b, "b", with_offset=True),
],
"out[i] = a[i] %s b[i]" % operator,
name="binop_kernel")
particle_id_t b_t_count = box_target_counts_nonchild[i];
for (particle_id_t j = b_t_start; j < b_t_start+b_t_count; ++j)
{
particle_id_t user_target_id = user_target_ids[j];
if (user_order_flags[user_target_id])
{
APPEND_filt_tgt_list(user_target_id);
}
}
}
""", strict_undefined=True).render(
dtype_to_ctype=dtype_to_ctype,
particle_id_dtype=particle_id_dtype
), arg_decls=[
VectorArg(user_order_flags_dtype, "user_order_flags"),
VectorArg(particle_id_dtype, "user_target_ids"),
VectorArg(particle_id_dtype, "box_target_starts"),
VectorArg(particle_id_dtype, "box_target_counts_nonchild"),
])
return builder
# box-local morton bin counts for each particle at the current level
# only valid from scan -> split'n'sort
VectorArg(morton_bin_count_dtype, "morton_bin_counts"), # [nparticles]
# (local) morton nrs for each particle at the current level
# only valid from scan -> split'n'sort
VectorArg(np.uint8, "morton_nrs"), # [nparticles]
# segment flags
# invariant to sorting once set
# (particles are only reordered within a box)
VectorArg(np.uint8, "box_start_flags"), # [nparticles]
VectorArg(box_id_dtype, "box_ids"), # [nparticles]
VectorArg(box_id_dtype, "unsplit_box_ids"), # [nparticles]
VectorArg(box_id_dtype, "split_box_ids"), # [nparticles]
# per-box morton bin counts
VectorArg(morton_bin_count_dtype, "box_morton_bin_counts"), # [nparticles]
# particle# at which each box starts
VectorArg(particle_id_dtype, "box_starts"), # [nboxes]
# number of particles in each box
VectorArg(particle_id_dtype,"box_particle_counts"), # [nboxes]
# pointer to parent box
VectorArg(box_id_dtype, "parent_ids"), # [nboxes]
# morton nr identifier {quadr,oct}ant of parent in which this box was created
VectorArg(morton_nr_dtype, "box_morton_nrs"), # [nboxes]
map_expr[i] = "in[i]"
from pyopencl.tools import (
parse_arg_list, get_arg_list_scalar_arg_dtypes,
get_arg_offset_adjuster_code, VectorArg)
arg_prep = ""
if stage==1 and arguments is not None:
arguments = parse_arg_list(arguments, with_offset=True)
arg_prep = get_arg_offset_adjuster_code(arguments)
if stage==2 and arguments is not None:
arguments = parse_arg_list(arguments)
arguments = (
[VectorArg(dtype_out, "pyopencl_reduction_inp")]
+arguments)
inf = _get_reduction_source(
ctx, dtype_to_ctype(dtype_out), dtype_out.itemsize,
neutral, reduce_expr, map_exprs, arguments,
name, preamble, arg_prep, device, max_group_size)
inf.program = cl.Program(ctx, inf.source)
inf.program.build(options)
inf.kernel = getattr(inf.program, name)
inf.arg_types = arguments
inf.kernel.set_scalar_arg_dtypes(
[None, ]*len(map_exprs)+[np.int64]
def get_cumsum_kernel(context, input_dtype, output_dtype):
from pyopencl.tools import VectorArg
return GenericScanKernel(
context, output_dtype,
arguments=[
VectorArg(input_dtype, "input"),
VectorArg(output_dtype, "output"),
],
input_expr="input[i]",
scan_expr="a+b", neutral="0",
output_statement="""
output[i] = item;
def get_take_put_kernel(context, dtype, idx_dtype, with_offsets, vec_count=1):
ctx = {
"idx_tp": dtype_to_ctype(idx_dtype),
"tp": dtype_to_ctype(dtype),
}
args = [
VectorArg(dtype, "dest%d" % i)
for i in range(vec_count)
] + [
VectorArg(idx_dtype, "gmem_dest_idx", with_offset=True),
VectorArg(idx_dtype, "gmem_src_idx", with_offset=True),
] + [
VectorArg(dtype, "src%d" % i, with_offset=True)
for i in range(vec_count)
] + [
ScalarArg(idx_dtype, "offset%d" % i)
for i in range(vec_count) if with_offsets
]
if with_offsets:
def get_copy_insn(i):
return ("dest%d[dest_idx] = "
"src%d[src_idx+offset%d];"
% (i, i, i))
else:
def get_copy_insn(i):
return ("dest%d[dest_idx] = "
def get_unop_kernel(context, operator, res_dtype, in_dtype):
return get_elwise_kernel(context, [
VectorArg(res_dtype, "z", with_offset=True),
VectorArg(in_dtype, "y", with_offset=True),
],
"z[i] = %s y[i]" % operator,
name="unary_op_kernel")
TRAVERSAL_PREAMBLE_TEMPLATE
+ BALLS_TO_LEAVES_TEMPLATE,
strict_undefined=True).render(**render_vars)
from pyopencl.tools import VectorArg, ScalarArg
from pyopencl.algorithm import ListOfListsBuilder
result = ListOfListsBuilder(self.context,
[
("ball_numbers", ball_id_dtype),
("overlapping_leaves", box_id_dtype),
],
str(src),
arg_decls=[
VectorArg(box_flags_enum.dtype, "box_flags"),
VectorArg(coord_dtype, "box_centers"),
VectorArg(box_id_dtype, "box_child_ids"),
VectorArg(np.uint8, "box_levels"),
ScalarArg(coord_dtype, "root_extent"),
ScalarArg(box_id_dtype, "aligned_nboxes"),
VectorArg(coord_dtype, "ball_radii"),
] + [
VectorArg(coord_dtype, "ball_"+ax)
for ax in AXIS_NAMES[:dimensions]],
name_prefix="circles_to_balls",
count_sharing={
# /!\ This makes a promise that APPEND_ball_numbers will
# always occur *before* APPEND_overlapping_leaves.
"overlapping_leaves": "ball_numbers"
},
complex_kernel=True)
logger.info("done building leaves-to-balls lookup kernel")