Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def get_c_typedef_line(cls):
"""Returns a typedef to define this enum in C."""
from pyopencl.tools import dtype_to_ctype
return "typedef %s %s;" % (dtype_to_ctype(cls.dtype), cls.c_name)
self.queue.context,
Template(r"""
${box_id_t} *source_parent_boxes,
${box_level_t} *box_levels,
double *m2m_cost,
double *nm2m,
% for i in range(2**ndimensions):
% if i == 2**ndimensions - 1:
${box_id_t} *box_child_ids_${i}
% else:
${box_id_t} *box_child_ids_${i},
% endif
% endfor
""").render(
ndimensions=ndimensions,
box_id_t=dtype_to_ctype(box_id_dtype),
box_level_t=dtype_to_ctype(box_level_dtype)
),
Template(r"""
${box_id_t} box_idx = source_parent_boxes[i];
${box_level_t} target_level = box_levels[box_idx];
if(target_level <= 1) {
nm2m[i] = 0.0;
} else {
int nchild = 0;
% for i in range(2**ndimensions):
if(box_child_ids_${i}[box_idx])
nchild += 1;
% endfor
nm2m[i] = nchild * m2m_cost[target_level];
}
""").render(
def __init__(self, gpu_nd_array):
#assert isinstance(gpu_nd_array, gpu_ndarray.GpuNdArrayObject)
self.gpu_nd_array = gpu_nd_array
self.ctype = dtype_to_ctype(self.gpu_nd_array.dtype)
get_arg_offset_adjuster_code, VectorArg)
arg_prep = ""
if stage==1 and arguments is not None:
arguments = parse_arg_list(arguments, with_offset=True)
arg_prep = get_arg_offset_adjuster_code(arguments)
if stage==2 and arguments is not None:
arguments = parse_arg_list(arguments)
arguments = (
[VectorArg(dtype_out, "pyopencl_reduction_inp_%i"%i) for i in range(len(map_exprs))]
+arguments)
inf = _get_reduction_source(
ctx, dtype_to_ctype(dtype_out), dtype_out.itemsize,
neutral, reduce_expr, map_exprs, arguments,
name, preamble, arg_prep, device, max_group_size)
inf.program = cl.Program(ctx, inf.source)
inf.program.build(options)
inf.kernel = getattr(inf.program, name)
inf.arg_types = arguments
inf.kernel.set_scalar_arg_dtypes(
[None, ]*len(map_exprs)+[np.int64]
+get_arg_list_scalar_arg_dtypes(inf.arg_types)
+[np.uint32]*2)
return inf
def get_sum_kernel(ctx, dtype_out, dtype_in):
if dtype_out is None:
dtype_out = dtype_in
reduce_expr = "a+b"
neutral_expr = "0"
if dtype_out.kind == "c":
from pyopencl.elementwise import complex_dtype_to_name
dtname = complex_dtype_to_name(dtype_out)
reduce_expr = "%s_add(a, b)" % dtname
neutral_expr = "%s_new(0, 0)" % dtname
return ReductionKernel(ctx, dtype_out, neutral_expr, reduce_expr,
arguments="const %(tp)s *in"
% {"tp": dtype_to_ctype(dtype_in)})
def process_list2_knl(self, box_id_dtype, box_level_dtype):
return ElementwiseKernel(
self.queue.context,
Template(r"""
double *nm2l,
${box_id_t} *target_or_target_parent_boxes,
${box_id_t} *from_sep_siblings_starts,
${box_level_t} *box_levels,
double *m2l_cost
""").render(
box_id_t=dtype_to_ctype(box_id_dtype),
box_level_t=dtype_to_ctype(box_level_dtype)
),
Template(r"""
${box_id_t} start = from_sep_siblings_starts[i];
${box_id_t} end = from_sep_siblings_starts[i+1];
${box_level_t} ilevel = box_levels[target_or_target_parent_boxes[i]];
nm2l[i] = (end - start) * m2l_cost[ilevel];
""").render(
box_id_t=dtype_to_ctype(box_id_dtype),
box_level_t=dtype_to_ctype(box_level_dtype)
),
name="process_list2"
)
b = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), b)
if a_is_complex or x_is_complex or b_is_complex:
expr = "{root}_add({ax}, {b})".format(
ax=ax,
b=b,
root=complex_dtype_to_name(dtype_z))
else:
expr = "%s + %s" % (ax, b)
return get_elwise_kernel(context,
"%(tp_z)s *z, %(tp_a)s a, %(tp_x)s *x,%(tp_b)s b" % {
"tp_a": dtype_to_ctype(dtype_a),
"tp_x": dtype_to_ctype(dtype_x),
"tp_b": dtype_to_ctype(dtype_b),
"tp_z": dtype_to_ctype(dtype_z),
},
"z[i] = " + expr,
name="axpb")
def process_list4_knl(self, box_id_dtype, particle_id_dtype, box_level_dtype):
return ElementwiseKernel(
self.queue.context,
Template(r"""
double *nm2p,
${box_id_t} *from_sep_bigger_starts,
${box_id_t} *from_sep_bigger_lists,
${particle_id_t} *box_source_counts_nonchild,
${box_level_t} *box_levels,
double *p2l_cost
""").render(
box_id_t=dtype_to_ctype(box_id_dtype),
particle_id_t=dtype_to_ctype(particle_id_dtype),
box_level_t=dtype_to_ctype(box_level_dtype)
),
Template(r"""
${box_id_t} start = from_sep_bigger_starts[i];
${box_id_t} end = from_sep_bigger_starts[i+1];
for(${box_id_t} idx=start; idx < end; idx++) {
${box_id_t} src_ibox = from_sep_bigger_lists[idx];
${particle_id_t} nsources = box_source_counts_nonchild[src_ibox];
${box_level_t} ilevel = box_levels[src_ibox];
nm2p[i] += nsources * p2l_cost[ilevel];
}
""").render(
box_id_t=dtype_to_ctype(box_id_dtype),
particle_id_t=dtype_to_ctype(particle_id_dtype),
box_level_t=dtype_to_ctype(box_level_dtype)
),
name="process_list4"
ax = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), ax)
b = "%s_cast(%s)" % (complex_dtype_to_name(dtype_z), b)
if a_is_complex or x_is_complex or b_is_complex:
expr = "{root}_add({ax}, {b})".format(
ax=ax,
b=b,
root=complex_dtype_to_name(dtype_z))
else:
expr = "%s + %s" % (ax, b)
return get_elwise_kernel(context,
"%(tp_z)s *z, %(tp_a)s a, %(tp_x)s *x,%(tp_b)s b" % {
"tp_a": dtype_to_ctype(dtype_a),
"tp_x": dtype_to_ctype(dtype_x),
"tp_b": dtype_to_ctype(dtype_b),
"tp_z": dtype_to_ctype(dtype_z),
},
"z[i] = " + expr,
name="axpb")
__global ${index_t} *count,
__global ${index_t} *compressed_counts,
__global ${index_t} *nonempty_indices,
__global ${index_t} *compressed_indices,
__global ${index_t} *num_non_empty_list
"""
from sys import version_info
if version_info > (3, 0):
arguments = Template(arguments)
else:
arguments = Template(arguments, disable_unicode=True)
from pyopencl.scan import GenericScanKernel
return GenericScanKernel(
self.context, index_dtype,
arguments=arguments.render(index_t=dtype_to_ctype(index_dtype)),
input_expr="count[i] == 0 ? 0 : 1",
scan_expr="a+b", neutral="0",
output_statement="""
if (i + 1 < N) compressed_indices[i + 1] = item;
if (prev_item != item) {
nonempty_indices[item - 1] = i;
compressed_counts[item - 1] = count[i];
}
if (i + 1 == N) *num_non_empty_list = item;
""",
devices=self.devices)