Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
import dace
from simple_systolic_array import P, make_sdfg
from dace.config import Config
KERNEL_NAME = ("_this_is_a_very_long_kernel_name_that_does_not_fit_"
"in_the_61_character_limit")
if __name__ == "__main__":
Config.set("compiler", "fpga_vendor", value="intel_fpga")
sdfg = make_sdfg("name_too_long")
for node, _ in sdfg.all_nodes_recursive():
if isinstance(node, dace.sdfg.nodes.CodeNode):
node.label += KERNEL_NAME
sdfg.specialize({"P": 4})
try:
code = sdfg.generate_code()
except dace.codegen.targets.intel_fpga.NameTooLongError:
pass
else:
raise RuntimeError("No exception thrown.")
return
def modifies_graph(self):
return True
@registry.autoregister_params(singlestate=True)
@make_properties
class AccumulateTransient(pattern_matching.Transformation):
""" Implements the AccumulateTransient transformation, which adds
transient stream and data nodes between nested maps that lead to a
stream. The transient data nodes then act as a local accumulator.
"""
_tasklet = nodes.Tasklet('_')
_map_exit = nodes.MapExit(nodes.Map("", [], []))
_outer_map_exit = nodes.MapExit(nodes.Map("", [], []))
array = Property(
dtype=str,
desc="Array to create local storage for (if empty, first available)",
default=None,
allow_none=True)
@staticmethod
def expressions():
return [
sdutil.node_path_graph(AccumulateTransient._tasklet,
AccumulateTransient._map_exit,
AccumulateTransient._outer_map_exit)
]
language=dace.Language.CPP)
# Add the rest of the code
sdfg.append_global_code(cuda_globalcode.getvalue(), 'cuda')
# Rename outer connectors and add to node
input_edge._dst_conn = '_in'
output_edge._src_conn = '_out'
node.add_in_connector('_in')
node.add_out_connector('_out')
return tnode
@dace.library.node
class Reduce(dace.sdfg.nodes.LibraryNode):
""" An SDFG node that reduces an N-dimensional array to an
(N-k)-dimensional array, with a list of axes to reduce and
a reduction binary function. """
# Global properties
implementations = {
'pure': ExpandReducePure,
'OpenMP': ExpandReduceOpenMP,
'CUDA (device)': ExpandReduceCUDADevice,
'CUDA (block)': ExpandReduceCUDABlock,
# 'CUDA (warp)': ExpandReduceCUDAWarp,
# 'CUDA (warp allreduce)': ExpandReduceCUDAWarpAllreduce
}
default_implementation = 'pure'
else: # dst node's scope is higher than src node, propagate out
propagate_forward = True
# Innermost edge memlet
cur_memlet = memlet
# Verify that connectors exist
if (not memlet.is_empty() and hasattr(edges[0].src, "out_connectors")
and isinstance(edges[0].src, nd.CodeNode)
and not isinstance(edges[0].src, nd.LibraryNode) and
(src_conn is None or src_conn not in edges[0].src.out_connectors)):
raise ValueError("Output connector {} does not exist in {}".format(
src_conn, edges[0].src.label))
if (not memlet.is_empty() and hasattr(edges[-1].dst, "in_connectors")
and isinstance(edges[-1].dst, nd.CodeNode)
and not isinstance(edges[-1].dst, nd.LibraryNode) and
(dst_conn is None or dst_conn not in edges[-1].dst.in_connectors)):
raise ValueError("Input connector {} does not exist in {}".format(
dst_conn, edges[-1].dst.label))
path = edges if propagate_forward else reversed(edges)
# Propagate and add edges
for i, edge in enumerate(path):
# Figure out source and destination connectors
if propagate_forward:
sconn = src_conn if i == 0 else ("OUT_" +
edge.src.last_connector())
dconn = (dst_conn if i == len(edges) - 1 else
("IN_" + edge.dst.next_connector()))
else:
sconn = (src_conn if i == len(edges) - 1 else
("OUT_" + edge.src.next_connector()))
if isinstance(node, nodes.NestedSDFG):
change_storage(node.sdfg, storage)
@registry.autoregister_params(singlestate=True)
@properties.make_properties
class CopyToDevice(pattern_matching.Transformation):
""" Implements the copy-to-device transformation, which copies a nested
SDFG and its dependencies to a given device.
The transformation changes all data storage types of a nested SDFG to
the given `storage` property, and creates new arrays and copies around
the nested SDFG to that storage.
"""
_nested_sdfg = nodes.NestedSDFG("", graph.OrderedDiGraph(), {}, {})
storage = properties.Property(dtype=dtypes.StorageType,
desc="Nested SDFG storage",
choices=dtypes.StorageType,
from_string=lambda x: dtypes.StorageType[x],
default=dtypes.StorageType.Default)
@staticmethod
def annotates_memlets():
return True
@staticmethod
def expressions():
return [sdutil.node_path_graph(CopyToDevice._nested_sdfg)]
@staticmethod
def _components(
subgraph: gr.SubgraphView) -> List[Tuple[nodes.Node, nodes.Node]]:
"""
Returns the list of tuples non-array components in this subgraph.
Each element in the list is a 2 tuple of (input node, output node) of
the component.
"""
graph = (subgraph
if isinstance(subgraph, sd.SDFGState) else subgraph.graph)
sdict = subgraph.scope_dict(node_to_children=True)
ns = [(n, graph.exit_node(n)) if isinstance(n, nodes.EntryNode) else
(n, n) for n in sdict[None]
if isinstance(n, (nodes.CodeNode, nodes.EntryNode))]
return ns
from dace import registry
from dace.transformation import pattern_matching
from dace.properties import make_properties
import dace.libraries.blas as blas
@registry.autoregister_params(singlestate=True)
@make_properties
class MatrixProductTranspose(pattern_matching.Transformation):
""" Implements the matrix-matrix product transpose transformation.
T(A) @ T(B) = T(B @ A)
"""
_transpose_a = blas.Transpose("")
_at = nodes.AccessNode("")
_transpose_b = blas.Transpose("")
_bt = nodes.AccessNode("")
_a_times_b = blas.MatMul("")
@staticmethod
def expressions():
graph = dace.sdfg.graph.OrderedDiGraph()
graph.add_node(MatrixProductTranspose._transpose_a)
graph.add_node(MatrixProductTranspose._at)
graph.add_node(MatrixProductTranspose._transpose_b)
graph.add_node(MatrixProductTranspose._bt)
graph.add_node(MatrixProductTranspose._a_times_b)
graph.add_edge(MatrixProductTranspose._transpose_a,
MatrixProductTranspose._at, None)
graph.add_edge(MatrixProductTranspose._at,
MatrixProductTranspose._a_times_b, None)
def add_children(treenode):
if propagate_forward:
if not (isinstance(treenode.edge.dst, nd.EntryNode)
and treenode.edge.dst_conn
and treenode.edge.dst_conn.startswith('IN_')):
return
conn = treenode.edge.dst_conn[3:]
treenode.children = [
mm.MemletTree(e, parent=treenode)
for e in state.out_edges(treenode.edge.dst)
if e.src_conn == 'OUT_%s' % conn
]
elif propagate_backward:
if (not isinstance(treenode.edge.src, nd.ExitNode)
or treenode.edge.src_conn is None):
return
conn = treenode.edge.src_conn[4:]
treenode.children = [
mm.MemletTree(e, parent=treenode)
for e in state.in_edges(treenode.edge.src)
if e.dst_conn == 'IN_%s' % conn
]
for child in treenode.children:
add_children(child)
scope_dict = state.scope_dict()
for node in state.nodes():
if (isinstance(node, nodes.AccessNode)
and node.desc(sdfg).storage == dtypes.StorageType.Default):
nodedesc = node.desc(sdfg)
if depth >= 2:
nodedesc.storage = dtypes.StorageType.FPGA_Local
else:
if scope_dict[node]:
nodedesc.storage = dtypes.StorageType.FPGA_Local
else:
nodedesc.storage = dtypes.StorageType.FPGA_Global
if (hasattr(node, "schedule")
and node.schedule == dace.dtypes.ScheduleType.Default):
node.schedule = dace.dtypes.ScheduleType.FPGA_Device
if isinstance(node, nodes.NestedSDFG):
for s in node.sdfg.nodes():
fpga_update(node.sdfg, s, depth + 1)
# If this node has been visited already, skip it
if node in visited:
continue
visited.add(node)
# Set the node parent (or its parent's children)
if not node_to_children:
result[node] = current_scope
else:
result[current_scope].append(node)
successors = [n for n in graph.successors(node) if n not in visited]
# If this is an Entry Node, we need to recurse further
if isinstance(node, nd.EntryNode):
node_queue.extend(
_scope_dict_inner(graph, collections.deque(successors), node,
node_to_children, result))
# If this is an Exit Node, we push the successors to the external
# queue
elif isinstance(node, nd.ExitNode):
external_queue.extend(successors)
# Otherwise, it is a plain node, and we push its successors to the
# same queue
else:
node_queue.extend(successors)
return external_queue