diff --git a/dace/codegen/targets/cpp.py b/dace/codegen/targets/cpp.py index 89239abcb3..6ad55112f0 100644 --- a/dace/codegen/targets/cpp.py +++ b/dace/codegen/targets/cpp.py @@ -291,7 +291,7 @@ def emit_memlet_reference(dispatcher: 'TargetDispatcher', typedef = conntype.ctype offset = cpp_offset_expr(desc, memlet.subset) offset_expr = '[' + offset + ']' - is_scalar = not isinstance(conntype, dtypes.pointer) + is_scalar = not isinstance(conntype, dtypes.pointer) and not fpga.is_fpga_array(desc) ptrname = ptr(memlet.data, desc, sdfg, dispatcher.frame) ref = '' diff --git a/dace/codegen/targets/cpu.py b/dace/codegen/targets/cpu.py index c7d05de5a3..f7b464897e 100644 --- a/dace/codegen/targets/cpu.py +++ b/dace/codegen/targets/cpu.py @@ -191,7 +191,7 @@ def generate_node(self, def allocate_view(self, sdfg: SDFG, cfg: ControlFlowRegion, dfg: SDFGState, state_id: int, node: nodes.AccessNode, global_stream: CodeIOStream, declaration_stream: CodeIOStream, - allocation_stream: CodeIOStream) -> None: + allocation_stream: CodeIOStream, decouple_array_interfaces: bool = False) -> None: """ Allocates (creates pointer and refers to original) a view of an existing array, scalar, or view. @@ -233,7 +233,8 @@ def allocate_view(self, sdfg: SDFG, cfg: ControlFlowRegion, dfg: SDFGState, stat name, dtypes.pointer(nodedesc.dtype), ancestor=0, - is_write=is_write) + is_write=is_write, + decouple_array_interfaces=decouple_array_interfaces) # Test for views of container arrays and structs if isinstance(sdfg.arrays[viewed_dnode.data], (data.Structure, data.ContainerArray, data.ContainerView)): diff --git a/dace/codegen/targets/xilinx.py b/dace/codegen/targets/xilinx.py index 8dac2720b6..fcb9ccf2d7 100644 --- a/dace/codegen/targets/xilinx.py +++ b/dace/codegen/targets/xilinx.py @@ -1216,7 +1216,7 @@ def allocate_view(self, sdfg: dace.SDFG, cfg: ControlFlowRegion, dfg: dace.SDFGS node: dace.nodes.AccessNode, global_stream: CodeIOStream, declaration_stream: CodeIOStream, allocation_stream: CodeIOStream) -> None: return self._cpu_codegen.allocate_view(sdfg, cfg, dfg, state_id, node, global_stream, declaration_stream, - allocation_stream) + allocation_stream, decouple_array_interfaces=self._decouple_array_interfaces) def generate_nsdfg_arguments(self, sdfg, cfg, dfg, state, node): # Connectors that are both input and output share the same name, unless diff --git a/dace/libraries/blas/nodes/batched_matmul.py b/dace/libraries/blas/nodes/batched_matmul.py index 1ced5b0cba..e18288ea6f 100644 --- a/dace/libraries/blas/nodes/batched_matmul.py +++ b/dace/libraries/blas/nodes/batched_matmul.py @@ -22,7 +22,7 @@ class ExpandBatchedMatMulPure(ExpandTransformation): @staticmethod def make_sdfg(node, parent_state, parent_sdfg): # Get metadata from parent SDFG - ((edge_a, outer_array_a, shape_a, strides_a), (edge_b, outer_array_b, shape_b, strides_b), + ((edge_a, outer_array_a, shape_a, strides_a, _, _), (edge_b, outer_array_b, shape_b, strides_b, _, _), cdata) = _get_matmul_operands(node, parent_state, parent_sdfg) outedge = parent_state.out_edges(node)[0] cdesc = parent_sdfg.arrays[outedge.data.data] @@ -52,7 +52,7 @@ def make_sdfg(node, parent_state, parent_sdfg): _, array_a = sdfg.add_array("_a", shape_a, dtype_a, strides=strides_a, storage=storage) _, array_b = sdfg.add_array("_b", shape_b, dtype_b, strides=strides_b, storage=storage) - _, array_c = sdfg.add_array("_c", shape_c, dtype_c, strides=cdata[-1], storage=storage) + _, array_c = sdfg.add_array("_c", shape_c, dtype_c, strides=cdata[-3], storage=storage) # Add an initialization state init_state = sdfg.add_state() @@ -91,7 +91,7 @@ class ExpandBatchedMatMulMKL(ExpandTransformation): @staticmethod def expansion(node, state, sdfg): node.validate(sdfg, state) - (_, adesc, ashape, astrides), (_, bdesc, bshape, bstrides), _ = _get_matmul_operands(node, state, sdfg) + (_, adesc, ashape, astrides, _, _), (_, bdesc, bshape, bstrides, _, _), _ = _get_matmul_operands(node, state, sdfg) cdesc: dt.Array = sdfg.arrays[state.out_edges(node)[0].data.data] check_access(dtypes.ScheduleType.CPU_Multicore, adesc, bdesc, cdesc) dtype = cdesc.dtype.base_type @@ -160,7 +160,7 @@ class ExpandBatchedMatMulOpenBLAS(ExpandTransformation): @staticmethod def expansion(node, state, sdfg): node.validate(sdfg, state) - (_, adesc, ashape, astrides), (_, bdesc, bshape, bstrides), _ = _get_matmul_operands(node, state, sdfg) + (_, adesc, ashape, astrides, _, _), (_, bdesc, bshape, bstrides, _, _), _ = _get_matmul_operands(node, state, sdfg) cdesc = sdfg.arrays[state.out_edges(node)[0].data.data] check_access(dtypes.ScheduleType.CPU_Multicore, adesc, bdesc, cdesc) dtype = cdesc.dtype.base_type @@ -446,10 +446,7 @@ def validate(self, sdfg, state): f'may not match', UserWarning) elif not res: raise ValueError("Inputs to matrix-matrix product must agree in the k-dimension") - out_subset = dc(out_memlet.subset) - out_subset.squeeze() - size2 = out_subset.size() - if len(size2) != 3: + if len(out_memlet.subset) != 3: raise ValueError("batched matrix-matrix product only supported on matrices") diff --git a/dace/libraries/blas/nodes/gemm.py b/dace/libraries/blas/nodes/gemm.py index ac8732d106..b5683c7650 100644 --- a/dace/libraries/blas/nodes/gemm.py +++ b/dace/libraries/blas/nodes/gemm.py @@ -7,8 +7,7 @@ from dace.frontend.common import op_repository as oprepo import dace.sdfg.nodes from dace.transformation.transformation import ExpandTransformation -from dace.libraries.blas.blas_helpers import (to_blastype, get_gemm_opts, check_access, dtype_to_cudadatatype, - to_cublas_computetype) +from dace.libraries.blas.blas_helpers import to_blastype, check_access, dtype_to_cudadatatype, to_cublas_computetype from dace.libraries.blas.nodes.matmul import (_get_matmul_operands, _get_codegen_gemm_opts) from .. import environments import numpy as np @@ -47,7 +46,7 @@ class ExpandGemmPure(ExpandTransformation): def make_sdfg(node, parent_state, parent_sdfg): sdfg = dace.SDFG(node.label + "_sdfg") - ((edge_a, outer_array_a, shape_a, strides_a), (edge_b, outer_array_b, shape_b, strides_b), + ((edge_a, outer_array_a, shape_a, strides_a, _, _), (edge_b, outer_array_b, shape_b, strides_b, _, _), cdata) = _get_matmul_operands(node, parent_state, parent_sdfg) dtype_a = outer_array_a.dtype.type @@ -79,7 +78,7 @@ def make_sdfg(node, parent_state, parent_sdfg): _, array_a = sdfg.add_array("_a", shape_a, dtype_a, strides=strides_a, storage=outer_array_a.storage) _, array_b = sdfg.add_array("_b", shape_b, dtype_b, strides=strides_b, storage=outer_array_b.storage) - _, array_c = sdfg.add_array("_c", shape_c, dtype_c, strides=cdata[-1], storage=cdata[1].storage) + _, array_c = sdfg.add_array("_c", shape_c, dtype_c, strides=cdata[-3], storage=cdata[1].storage) if equal_valued(1, node.alpha): mul_program = "__out = __a * __b" @@ -93,7 +92,7 @@ def make_sdfg(node, parent_state, parent_sdfg): state = sdfg.add_state_after(init_state, node.label + "_state") if '_cin' in node.in_connectors: - sdfg.add_array("_cin", shape_c, dtype_c, strides=cdata[-1], storage=cdata[1].storage) + sdfg.add_array("_cin", shape_c, dtype_c, strides=cdata[-3], storage=cdata[1].storage) mul_out, mul_out_array = "_c", array_c output_nodes = None @@ -159,7 +158,7 @@ class ExpandGemmOpenBLAS(ExpandTransformation): @staticmethod def expansion(node, state, sdfg): node.validate(sdfg, state) - (_, adesc, ashape, astrides), (_, bdesc, bshape, bstrides), _ = _get_matmul_operands(node, state, sdfg) + (_, adesc, _, _, _, _), (_, bdesc, _, _, _, _), _ = _get_matmul_operands(node, state, sdfg) dtype = adesc.dtype.base_type func = to_blastype(dtype.type).lower() + 'gemm' alpha = f'{dtype.ctype}({node.alpha})' @@ -458,7 +457,7 @@ class ExpandGemmPBLAS(ExpandTransformation): @staticmethod def expansion(node, state, sdfg): node.validate(sdfg, state) - (_, adesc, ashape, astrides), (_, bdesc, bshape, bstrides), _ = _get_matmul_operands(node, state, sdfg) + (_, adesc, ashape, _, _, _), (_, bdesc, bshape, _, _, _), _ = _get_matmul_operands(node, state, sdfg) dtype = adesc.dtype.base_type if not equal_valued(0, node.beta): @@ -513,8 +512,8 @@ def expansion(node, parent_state, parent_sdfg, num_pes=32, tile_size_m=None): :return: """ - ((edge_a, outer_array_a, shape_a, strides_a), (edge_b, outer_array_b, shape_b, strides_b), - (edge_c, outer_array_c, shape_c, strides_c)) = _get_matmul_operands(node, parent_state, parent_sdfg) + ((edge_a, outer_array_a, shape_a, strides_a, _, _), (edge_b, outer_array_b, shape_b, strides_b, _, _), + (edge_c, outer_array_c, shape_c, strides_c, _, _)) = _get_matmul_operands(node, parent_state, parent_sdfg) dtype_a = outer_array_a.dtype.type dtype_b = outer_array_b.dtype.type @@ -1013,17 +1012,11 @@ def validate(self, sdfg, state): size2 = None for _, _, _, dst_conn, memlet in state.in_edges(self): if dst_conn == '_a': - subset = dc(memlet.subset) - subset.squeeze() - size0 = subset.size() + size0 = memlet.subset.size() if dst_conn == '_b': - subset = dc(memlet.subset) - subset.squeeze() - size1 = subset.size() + size1 = memlet.subset.size() if dst_conn == '_c': - subset = dc(memlet.subset) - subset.squeeze() - size2 = subset.size() + size2 = memlet.subset.size() if self.transA: size0 = list(reversed(size0)) @@ -1043,9 +1036,7 @@ def validate(self, sdfg, state): UserWarning) elif not res: raise ValueError("Inputs to matrix-matrix product must agree in the k-dimension") - out_subset = dc(out_memlet.subset) - out_subset.squeeze() - size3 = out_subset.size() + size3 = out_memlet.subset.size() if size2 is not None: res = [equal(s0, s1) for s0, s1 in zip(size2, size3)] fail = any([r is False for r in res]) diff --git a/dace/libraries/blas/nodes/gemv.py b/dace/libraries/blas/nodes/gemv.py index 52091c6864..c598bbcbe3 100644 --- a/dace/libraries/blas/nodes/gemv.py +++ b/dace/libraries/blas/nodes/gemv.py @@ -10,7 +10,6 @@ from dace.libraries.blas import blas_helpers from dace.frontend.common import op_repository as oprepo from dace.libraries.blas import environments -from dace.sdfg import nodes, utils as sdutils import numpy as np import warnings @@ -24,13 +23,8 @@ class ExpandGemvPure(ExpandTransformation): def expansion(node, parent_state, parent_sdfg, **kwargs): node.validate(parent_sdfg, parent_state) sdfg = dace.SDFG(node.label + "_sdfg") - ((edge_a, outer_array_a, shape_a, strides_a), (edge_x, outer_array_x, shape_x, strides_x), - (edge_y, outer_array_y, shape_y, strides_y)) = _get_matmul_operands(node, - parent_state, - parent_sdfg, - name_lhs="_A", - name_rhs="_x", - name_out="_y") + ((edge_a, outer_array_a, _, _, shape_a, strides_a), (edge_x, outer_array_x, _, _, shape_x, strides_x), + (edge_y, outer_array_y, _, _, shape_y, strides_y)) = _get_matmul_operands(node, parent_state, parent_sdfg, name_lhs="_A", name_rhs="_x", name_out="_y") dtype_a = outer_array_a.dtype.type dtype_x = outer_array_x.dtype.type dtype_y = outer_array_y.dtype.type @@ -154,13 +148,8 @@ def expansion(node, parent_state, parent_sdfg, tile_size_x=None, tile_size_y=Non beta = node.beta # Get input/output data (the method considers also the presence of view nodes) - ((edge_a, desc_a, shape_a, strides_a), (edge_x, desc_x, shape_x, strides_x), - (edge_y, desc_y, shape_y, strides_y)) = _get_matmul_operands(node, - parent_state, - parent_sdfg, - name_lhs="_A", - name_rhs="_x", - name_out="_y") + ((edge_a, desc_a, _, _, shape_a, strides_a), (edge_x, desc_x, _, _, shape_x, strides_x), + (edge_y, desc_y, _, _, shape_y, strides_y)) = _get_matmul_operands(node, parent_state, parent_sdfg, name_lhs="_A", name_rhs="_x", name_out="_y") # Create local versions of input/output data nodes _, desc_a = sdfg.add_array("_A", @@ -618,13 +607,8 @@ class ExpandGemvCuBLAS(ExpandTransformation): def expansion(node: 'Gemv', state, sdfg, m=None, n=None, **kwargs): node.validate(sdfg, state) - ((edge_a, outer_array_a, shape_a, strides_a), (edge_x, outer_array_x, shape_x, strides_x), - (edge_y, outer_array_y, shape_y, strides_y)) = _get_matmul_operands(node, - state, - sdfg, - name_lhs="_A", - name_rhs="_x", - name_out="_y") + ((edge_a, outer_array_a, _, _, shape_a, strides_a), (edge_x, outer_array_x, _, _, shape_x, strides_x), + (edge_y, outer_array_y, _, _, shape_y, strides_y)) = _get_matmul_operands(node, state, sdfg, name_lhs="_A", name_rhs="_x", name_out="_y") dtype_a = outer_array_a.dtype.type dtype = outer_array_x.dtype.base_type veclen = outer_array_x.dtype.veclen @@ -720,13 +704,8 @@ def expansion(node: 'Gemv', state, sdfg, m=None, n=None, **kwargs): node.validate(sdfg, state) - ((edge_a, outer_array_a, shape_a, strides_a), (edge_x, outer_array_x, shape_x, strides_x), - (edge_y, outer_array_y, shape_y, strides_y)) = _get_matmul_operands(node, - state, - sdfg, - name_lhs="_A", - name_rhs="_x", - name_out="_y") + ((edge_a, outer_array_a, _, _, shape_a, strides_a), (edge_x, outer_array_x, _, _, shape_x, strides_x), + (edge_y, outer_array_y, _, _, shape_y, strides_y)) = _get_matmul_operands(node, state, sdfg, name_lhs="_A", name_rhs="_x", name_out="_y") dtype_a = outer_array_a.dtype.type dtype = outer_array_x.dtype.base_type veclen = outer_array_x.dtype.veclen @@ -806,13 +785,8 @@ class ExpandGemvPBLAS(ExpandTransformation): @staticmethod def expansion(node: 'Gemv', state, sdfg, m=None, n=None, **kwargs): node.validate(sdfg, state) - ((edge_a, outer_array_a, shape_a, strides_a), (edge_x, outer_array_x, shape_x, strides_x), - (edge_y, outer_array_y, shape_y, strides_y)) = _get_matmul_operands(node, - state, - sdfg, - name_lhs="_A", - name_rhs="_x", - name_out="_y") + ((edge_a, outer_array_a, _, _, shape_a, strides_a), (edge_x, outer_array_x, _, _, shape_x, strides_x), + (edge_y, outer_array_y, _, _, shape_y, strides_y)) = _get_matmul_operands(node, state, sdfg, name_lhs="_A", name_rhs="_x", name_out="_y") dtype_a = outer_array_a.dtype.type dtype = outer_array_x.dtype.base_type veclen = outer_array_x.dtype.veclen diff --git a/dace/libraries/blas/nodes/ger.py b/dace/libraries/blas/nodes/ger.py index 55ab4677f8..c22f8f7010 100644 --- a/dace/libraries/blas/nodes/ger.py +++ b/dace/libraries/blas/nodes/ger.py @@ -1,23 +1,18 @@ # Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved. -from dace.properties import Property, SymbolicProperty +from dace.properties import SymbolicProperty from dace.transformation.transformation import ExpandTransformation from dace.frontend.common import op_repository as oprepo from dace.sdfg.nodes import LibraryNode -from dace.libraries.blas.nodes.matmul import _get_matmul_operands import dace.library as library from dace.sdfg import SDFG, SDFGState, nodes from dace import data as dt, memlet as mm, subsets as sbs import dace import copy -import numpy as np import dace.library import dace.properties import dace.sdfg.nodes -from dace import dtypes -from dace.memlet import Memlet - @library.expansion class ExpandGerPure(ExpandTransformation): diff --git a/dace/libraries/blas/nodes/matmul.py b/dace/libraries/blas/nodes/matmul.py index 83d07ded29..db780854ab 100644 --- a/dace/libraries/blas/nodes/matmul.py +++ b/dace/libraries/blas/nodes/matmul.py @@ -12,23 +12,27 @@ def _get_matmul_operands(node, state, sdfg, name_lhs="_a", name_rhs="_b", name_o res_rhs = None for edge in state.all_edges(node): if edge.dst_conn in [name_lhs, name_rhs]: - subset = dc(edge.data.subset) - squeezed = subset.squeeze() - size = subset.size() + size = edge.data.subset.size() + squeezed = dc(edge.data.subset) + squeezed_dims = squeezed.squeeze() + squeezed_size = squeezed.size() outer_array = sdfg.data(dace.sdfg.find_input_arraynode(state, edge).data) - strides = [s for i, s in enumerate(outer_array.strides) if i in squeezed] - res = edge, outer_array, size, strides + strides = list(outer_array.strides) + squeezed_strides = [s for i, s in enumerate(outer_array.strides) if i in squeezed_dims] + res = edge, outer_array, size, strides, squeezed_size, squeezed_strides if edge.dst_conn == name_lhs: res_lhs = res else: res_rhs = res elif edge.src_conn == name_out: - subset = dc(edge.data.subset) - squeezed = subset.squeeze() - size = subset.size() + size = edge.data.subset.size() + squeezed = dc(edge.data.subset) + squeezed_dims = squeezed.squeeze() + squeezed_size = squeezed.size() outer_array = sdfg.data(dace.sdfg.find_output_arraynode(state, edge).data) - strides = [s for i, s in enumerate(outer_array.strides) if i in squeezed] - res_out = edge, outer_array, size, strides + strides = list(outer_array.strides) + squeezed_strides = [s for i, s in enumerate(outer_array.strides) if i in squeezed_dims] + res_out = edge, outer_array, size, strides, squeezed_size, squeezed_strides for res, name in ((res_lhs, name_lhs), (res_rhs, name_rhs), (res_out, name_out)): if res is None: raise ValueError("Matrix multiplication connector " "\"{}\" not found.".format(name)) @@ -85,7 +89,7 @@ def _get_codegen_gemm_opts(node, state, sdfg, adesc, bdesc, cdesc, alpha, beta, from dace.codegen.common import sym2cpp from dace.libraries.blas.blas_helpers import get_gemm_opts - (_, _, ashape, astride), (_, _, bshape, bstride), (_, _, cshape, cstride) = _get_matmul_operands(node, state, sdfg) + (_, _, ashape, astride, _, _), (_, _, bshape, bstride, _, _), (_, _, cshape, cstride, _, _) = _get_matmul_operands(node, state, sdfg) if getattr(node, 'transA', False): ashape = list(reversed(ashape)) @@ -141,8 +145,8 @@ class SpecializeMatMul(dace.transformation.transformation.ExpandTransformation): @staticmethod def expansion(node, state, sdfg): a, b, c = _get_matmul_operands(node, state, sdfg) - size_a = a[2] - size_b = b[2] + size_a = a[4] + size_b = b[4] if len(size_a) == 2 and len(size_b) == 2: # Matrix and matrix -> GEMM from dace.libraries.blas.nodes.gemm import Gemm diff --git a/dace/transformation/dataflow/redundant_array.py b/dace/transformation/dataflow/redundant_array.py index 5e5072ff32..f426df9702 100644 --- a/dace/transformation/dataflow/redundant_array.py +++ b/dace/transformation/dataflow/redundant_array.py @@ -557,13 +557,13 @@ def apply(self, graph, sdfg): from dace.libraries.standard import Reduce reduction = False for e in graph.in_edges(in_array): - if isinstance(e.src, Reduce) or (isinstance(e.src, nodes.NestedSDFG) + if isinstance(e.src, Reduce) or (isinstance(e.src, (nodes.NestedSDFG, nodes.LibraryNode)) and len(in_desc.shape) != len(out_desc.shape)): reduction = True # If: # 1. A reduce node is involved; or - # 2. A NestedSDFG node is involved and the arrays have different dimensionality; or + # 2. A NestedSDFG or Library node is involved and the arrays have different dimensionality; or # 3. The memlet does not cover the removed array; or # 4. Dimensions are mismatching (all dimensions are popped); # create a view. @@ -1320,6 +1320,21 @@ def can_be_applied(self, graph, expr_index, sdfg, permissive=False): if any(m != a for m, a in zip(tmp.size(), out_shape)): return False + if not permissive: + # Ensure the view is not an input to a library node, where it may change the behavior, and similarly is not + # being used to change the strides for a nested SDFG. + for e in graph.out_edges(out_array): + for sink in graph.memlet_tree(e).leaves(): + sink_node = sink.dst + sink_conn = sink.dst_conn + if isinstance(sink_node, nodes.LibraryNode): + return False + if isinstance(sink_node, nodes.NestedSDFG): + if sink_conn in sink_node.sdfg.arrays and isinstance(out_desc, data.ArrayView): + ndesc = sink_node.sdfg.arrays[sink_conn] + if ndesc.strides != out_desc.strides or ndesc.dtype != out_desc.dtype: + return False + return True def apply(self, graph, sdfg): @@ -1458,6 +1473,21 @@ def can_be_applied(self, graph, expr_index, sdfg, permissive=False): if any(m != a for m, a in zip(tmp.size(), in_shape)): return False + if not permissive: + # Ensure the view is not an output from a library node, where it may change the behavior, and similarly is + # not being used to change the strides for a nested SDFG. + for e in graph.in_edges(in_array): + for source in graph.memlet_tree(e).leaves(): + source_node = source.src + source_conn = source.src_conn + if isinstance(source_node, nodes.LibraryNode): + return False + if isinstance(source_node, nodes.NestedSDFG): + if source_conn in source_node.sdfg.arrays and isinstance(in_desc, data.ArrayView): + ndesc = source_node.sdfg.arrays[source_conn] + if ndesc.strides != in_desc.strides or ndesc.dtype != in_desc.dtype: + return False + return True def apply(self, graph, sdfg): diff --git a/dace/transformation/interstate/sdfg_nesting.py b/dace/transformation/interstate/sdfg_nesting.py index 31e751bb6a..e1ab6b93f0 100644 --- a/dace/transformation/interstate/sdfg_nesting.py +++ b/dace/transformation/interstate/sdfg_nesting.py @@ -336,6 +336,16 @@ def apply(self, state: SDFGState, sdfg: SDFG): if edge is not None and not InlineSDFG._check_strides(array.strides, sdfg.arrays[edge.data.data].strides, edge.data, nsdfg_node): reshapes.add(aname) + # Among the nodes needing reshapes are any input/output nodes directly being used by library nodes. The shape + # influences the behavior of the access nodes and thus the reshapes through views are necessary. + for node in nstate.nodes(): + if isinstance(node, nodes.LibraryNode): + for ie in nstate.in_edges(node): + if isinstance(ie.src, nodes.AccessNode) and ie.src.data in inputs: + reshapes.add(ie.src.data) + for oe in nstate.out_edges(node): + if isinstance(oe.dst, nodes.AccessNode) and oe.dst.data in outputs: + reshapes.add(oe.dst.data) # All transients become transients of the parent (if data already # exists, find new name) diff --git a/tests/fpga/kernel_detection_test.py b/tests/fpga/kernel_detection_test.py index c1645a5c4a..f1576a2da8 100644 --- a/tests/fpga/kernel_detection_test.py +++ b/tests/fpga/kernel_detection_test.py @@ -187,7 +187,7 @@ def kernels_inside_component_2(x: dace.float32[8], y: dace.float32[8], v: dace.f return sdfg -@fpga_test() +@fpga_test(assert_ii_1=False) def test_kernels_lns_inside_component(): """ Tests for kernels detection inside a single connected component where we diff --git a/tests/npbench/polybench/doitgen_test.py b/tests/npbench/polybench/doitgen_test.py index 06cf3355f5..52fffd1d0d 100644 --- a/tests/npbench/polybench/doitgen_test.py +++ b/tests/npbench/polybench/doitgen_test.py @@ -5,11 +5,9 @@ import dace as dc import pytest import argparse -from dace.fpga_testing import fpga_test, xilinx_test +from dace.fpga_testing import fpga_test from dace.transformation.interstate import FPGATransformSDFG, InlineSDFG -from dace.transformation.dataflow import StreamingMemory, StreamingComposition -from dace.transformation.auto.auto_optimize import auto_optimize, fpga_auto_opt -from dace.config import set_temporary +from dace.transformation.auto.auto_optimize import auto_optimize # Data set sizes # NQ, NR, NP @@ -30,7 +28,7 @@ def doitgen_kernel(A: dc.float64[NR, NQ, NP], C4: dc.float64[NP, NP]): # Ideal - not working because Matmul with dim > 3 unsupported # A[:] = np.reshape(np.reshape(A, (NR, NQ, 1, NP)) @ C4, (NR, NQ, NP)) for r in range(NR): - A[r, :, :] = np.reshape(np.reshape(A[r], (NQ, 1, NP)) @ C4, (NQ, NP)) + A[r, :, :] = np.reshape(np.reshape(A[r], (NQ, NP)) @ C4, (NQ, NP)) def initialize(NR, NQ, NP, datatype=np.float64):