Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"b_packed = b")
state.add_memlet_path(write_buffer,
pack_tasklet,
dst_conn="b",
memlet=dace.Memlet.simple(
write_buffer,
"0",
veclen=VECTOR_LENGTH.get(),
num_accesses=VECTOR_LENGTH))
# Write back out to memory from unpacked to packed memory
state.add_memlet_path(pack_tasklet,
outer_exit,
write_output,
src_conn="b_packed",
memlet=dace.Memlet.simple(
"B_device",
"W*i",
veclen=VECTOR_LENGTH.get(),
num_accesses=VECTOR_LENGTH))
return state
# Buffer to tasklet
state.add_memlet_path(a_buffer,
unroll_entry,
tasklet,
dst_conn="_in",
memlet=dace.Memlet.simple(a_buffer.data,
"u",
veclen=1,
num_accesses=1))
# Tasklet to buffer
state.add_memlet_path(tasklet,
unroll_exit,
b_buffer,
src_conn="_out",
memlet=dace.Memlet.simple(b_buffer.data,
"u",
veclen=1,
num_accesses=1))
# Buffer to output stream
state.add_memlet_path(b_buffer,
exit,
produce_output_stream,
memlet=dace.Memlet.simple(
produce_output_stream.data,
"0",
other_subset_str="0:{}".format(veclen),
veclen=veclen,
num_accesses=1))
return sdfg
map_entry.in_connectors = {'IN_1'}
map_entry.out_connectors = {'OUT_1'}
map_exit.in_connectors = {'IN_1'}
map_exit.out_connectors = {'OUT_1'}
tasklet = state.add_tasklet(name=label + '_tasklet',
inputs={'a'},
outputs={'b'},
code='b = {} * a'.format(constant))
# Add edges
state.add_edge(A_src, None, map_entry, 'IN_1',
dace.Memlet.simple(A_node, A_outer_range))
state.add_edge(map_exit, 'OUT_1', B_dst, None,
dace.Memlet.simple(B_node, B_outer_range))
state.add_edge(map_entry, 'OUT_1', tasklet, 'a',
dace.Memlet.simple(A_node, A_inner_range))
if accumulate:
state.add_edge(
tasklet, 'b', map_exit, 'IN_1',
dace.Memlet.simple(B_node,
B_inner_range,
wcr_str='lambda x, y: x + y',
wcr_conflict=False))
else:
state.add_edge(tasklet, 'b', map_exit, 'IN_1',
dace.Memlet.simple(B_node, B_inner_range))
subset=Indices(["0"]),
vector_length=P.get(),
other_subset=Indices(["h", "w"])))
state.add_memlet_path(
a_val,
read_unroll_entry,
read_tasklet,
dst_conn="A_in",
memlet=Memlet.simple(a_val, "0", veclen=P.get(), num_accesses=1))
state.add_memlet_path(
read_tasklet,
read_unroll_exit,
read_exit,
A_pipes_out,
src_conn="A_pipe",
memlet=Memlet.simple(A_pipes_out, "p"))
# Write module
hist_pipes_in = state.add_stream(
"hist_pipes",
itype,
shape=(P, ),
transient=True,
storage=StorageType.FPGA_Local)
hist_device_out = state.add_array(
"hist_device", (num_bins, ),
itype,
transient=True,
storage=dace.dtypes.StorageType.FPGA_Global)
merge_entry, merge_exit = state.add_map(
"merge", {"nb": "0:num_bins"}, schedule=ScheduleType.FPGA_Device)
merge_reduce = state.add_reduce(
dace.Memlet.simple(
mul_out, ','.join(['_o%d' % i
for i in range(len(shape_c))]))
},
external_edges=True)
# Multiplication map
state.add_mapped_tasklet(
"_GEMM_",
{"__i%d" % i: "0:%s" % s
for i, s in enumerate([M, N, K])}, {
"__a":
dace.Memlet.simple(
"_a", "__i2, __i0" if node.transA else "__i0, __i2"),
"__b":
dace.Memlet.simple(
"_b", "__i1, __i2" if node.transB else "__i2, __i1")
},
mul_program, {
"__out":
dace.Memlet.simple(
mul_out, "__i0, __i1", wcr_str="lambda x, y: x + y")
},
external_edges=True,
output_nodes=output_nodes)
if node.beta != 0:
add_program = "__y = ({} * __c) + __tmp".format(
_cast_to_dtype_str(node.beta, dtype_a))
# manually broadcasting C to [M, N]
if list(shape_c) == [M, N]:
output_size = outedge.data.subset.size()
ome, omx = nstate.add_map(
'reduce_output', {
'_o%d' % i: '0:%s' % symstr(sz)
for i, sz in enumerate(outedge.data.subset.size())
})
outm = dace.Memlet.simple(
'_out',
','.join(['_o%d' % i for i in range(output_dims)]),
wcr_str=node.wcr)
inmm = dace.Memlet.simple('_in', ','.join(input_subset))
else:
ome, omx = None, None
outm = dace.Memlet.simple('_out', '0', wcr_str=node.wcr)
inmm = dace.Memlet.simple(
'_in', ','.join(['_i%d' % i for i in range(len(axes))]))
# Add inner map, which corresponds to the range to reduce, containing
# an identity tasklet
ime, imx = nstate.add_map(
'reduce_values', {
'_i%d' % i: '0:%s' % symstr(inedge.data.subset.size()[axis])
for i, axis in enumerate(sorted(axes))
})
# Add identity tasklet for reduction
t = nstate.add_tasklet('identity', {'inp'}, {'out'}, 'out = inp')
# Connect everything
r = nstate.add_read('_in')
output_nodes = None
else:
mul_out, mul_out_array = tmp, array_tmp = sdfg.add_temp_transient(
shape_c, dtype_c, storage=storage)
access_tmp = state.add_read(tmp)
output_nodes = {mul_out: access_tmp}
# Initialization map
init_state.add_mapped_tasklet(
'gemm_init',
{'_o%d' % i: '0:%s' % symstr(d)
for i, d in enumerate(shape_c)}, {},
'out = 0', {
'out':
dace.Memlet.simple(
mul_out, ','.join(['_o%d' % i
for i in range(len(shape_c))]))
},
external_edges=True)
# Multiplication map
state.add_mapped_tasklet(
"_GEMM_",
{"__i%d" % i: "0:%s" % s
for i, s in enumerate([M, N, K])}, {
"__a":
dace.Memlet.simple(
"_a", "__i2, __i0" if node.transA else "__i0, __i2"),
"__b":
dace.Memlet.simple(
"_b", "__i1, __i2" if node.transB else "__i2, __i1")
state.add_edge(map_exit, 'OUT_1', B_node, None,
dace.Memlet.simple(B_node, B_outer_range))
state.add_edge(map_entry, 'OUT_1', tasklet, 'scalar',
dace.Memlet.simple(alpha_node, alpha_inner_range))
state.add_edge(map_entry, 'OUT_2', tasklet, 'a',
dace.Memlet.simple(A_node, A_inner_range))
if accumulate:
state.add_edge(
tasklet, 'b', map_exit, 'IN_1',
dace.Memlet.simple(B_node,
B_inner_range,
wcr_str='lambda x, y: x + y',
wcr_conflict=wcr_conflict))
else:
state.add_edge(tasklet, 'b', map_exit, 'IN_1',
dace.Memlet.simple(B_node, B_inner_range))
return state
# Create map/tasklet
map_entry, map_exit = state.add_map(name=label + '_map',
ndrange=map_ranges)
map_entry.in_connectors = {'IN_1', 'IN_2'}
map_entry.out_connectors = {'OUT_1', 'OUT_2'}
map_exit.in_connectors = {'IN_1'}
map_exit.out_connectors = {'OUT_1'}
tasklet = state.add_tasklet(name=label + '_tasklet',
inputs={'scalar', 'a'},
outputs={'b'},
code='b = scalar * a')
# Add edges
state.add_edge(alpha_src, None, map_entry, 'IN_1',
dace.Memlet.simple(alpha_node, alpha_outer_range))
state.add_edge(A_src, None, map_entry, 'IN_2',
dace.Memlet.simple(A_node, A_outer_range))
state.add_edge(map_exit, 'OUT_1', B_dst, None,
dace.Memlet.simple(B_node, B_outer_range))
state.add_edge(map_entry, 'OUT_1', tasklet, 'scalar',
dace.Memlet.simple(alpha_node, alpha_inner_range))
state.add_edge(map_entry, 'OUT_2', tasklet, 'a',
dace.Memlet.simple(A_node, A_inner_range))
if accumulate:
state.add_edge(
tasklet, 'b', map_exit, 'IN_1',
dace.Memlet.simple(B_node,
B_inner_range,
wcr_str='lambda x, y: x + y',
wcr_conflict=wcr_conflict))
else:
else:
input_subset.append('_o%d' % octr)
octr += 1
output_size = outedge.data.subset.size()
ome, omx = nstate.add_map(
'reduce_output', {
'_o%d' % i: '0:%s' % symstr(sz)
for i, sz in enumerate(outedge.data.subset.size())
})
outm = dace.Memlet.simple(
'_out',
','.join(['_o%d' % i for i in range(output_dims)]),
wcr_str=node.wcr)
inmm = dace.Memlet.simple('_in', ','.join(input_subset))
else:
ome, omx = None, None
outm = dace.Memlet.simple('_out', '0', wcr_str=node.wcr)
inmm = dace.Memlet.simple(
'_in', ','.join(['_i%d' % i for i in range(len(axes))]))
# Add inner map, which corresponds to the range to reduce, containing
# an identity tasklet
ime, imx = nstate.add_map(
'reduce_values', {
'_i%d' % i: '0:%s' % symstr(inedge.data.subset.size()[axis])
for i, axis in enumerate(sorted(axes))
})
# Add identity tasklet for reduction
t = nstate.add_tasklet('identity', {'inp'}, {'out'}, 'out = inp')