diff --git a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py index 25a2032aeb..b02bc89db8 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py @@ -585,63 +585,63 @@ def apply(self, model): for node in graph.node: node_ind += 1 - successors = model.find_consumers(node.output[0]) - if successors is not None and len(successors) >= 2: - output_tensor = node.output[0] - n_outputs = len(successors) + for output_tensor in node.output: + successors = model.find_consumers(output_tensor) + if successors is not None and len(successors) >= 2: + n_outputs = len(successors) - dt = model.get_tensor_datatype(output_tensor) + dt = model.get_tensor_datatype(output_tensor) - # skip conversion for layers with float input - if not dt.is_integer(): - continue + # skip conversion for layers with float input + if not dt.is_integer(): + continue - # create clone tensors - out_shape = model.get_tensor_shape(output_tensor) - out_tensor_clones = [] - for i in range(n_outputs): - clone = helper.make_tensor_value_info( - model.make_new_valueinfo_name(), TensorProto.FLOAT, out_shape - ) - model.graph.value_info.append(clone) - out_tensor_clones += [clone.name] + # create clone tensors + out_shape = model.get_tensor_shape(output_tensor) + out_tensor_clones = [] + for i in range(n_outputs): + clone = helper.make_tensor_value_info( + model.make_new_valueinfo_name(), TensorProto.FLOAT, out_shape + ) + model.graph.value_info.append(clone) + out_tensor_clones += [clone.name] - num_ch = int(out_shape[-1]) - vecs = out_shape[:-1] + num_ch = int(out_shape[-1]) + vecs = out_shape[:-1] - # create node with no parallelization first - pe = 1 + # create node with no parallelization first + pe = 1 - dup_node = helper.make_node( - "DuplicateStreams", - [output_tensor], - out_tensor_clones, - domain="finn.custom_op.fpgadataflow", - backend="fpgadataflow", - NumChannels=num_ch, - PE=pe, - inputDataType=dt.name, - numInputVectors=vecs, - NumOutputStreams=n_outputs, - outFIFODepths=[2] * n_outputs, - name="DuplicateStreams_" + node.name, - ) + dup_node = helper.make_node( + "DuplicateStreams", + [output_tensor], + out_tensor_clones, + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + NumChannels=num_ch, + PE=pe, + inputDataType=dt.name, + numInputVectors=vecs, + NumOutputStreams=n_outputs, + outFIFODepths=[2] * n_outputs, + name="DuplicateStreams_" + node.name, + ) - graph.node.insert(node_ind, dup_node) + graph.node.insert(node_ind, dup_node) - # connect successors to out tensor clone - clone_idx = 0 - for successor in successors: - for i, succ_input in enumerate(successor.input): - if succ_input == output_tensor: - successor.input[i] = out_tensor_clones[clone_idx] - clone_idx += 1 - # if one node has multiple connections to the same output - # find_direct_successors will return one node per input - # so break the inner loop will result in correct behaviour - break + # connect successors to out tensor clone + clone_idx = 0 + for successor in successors: + for i, succ_input in enumerate(successor.input): + if succ_input == output_tensor: + successor.input[i] = out_tensor_clones[clone_idx] + clone_idx += 1 + # if one node has multiple connections to the same output + # find_direct_successors will return one node per input + # so break the inner loop will result in correct behaviour + break - graph_modified = True + graph_modified = True if graph_modified: model = model.transform(SortGraph()) diff --git a/src/finn/transformation/fpgadataflow/insert_fifo.py b/src/finn/transformation/fpgadataflow/insert_fifo.py index 21fb843052..9ed0f51cd4 100644 --- a/src/finn/transformation/fpgadataflow/insert_fifo.py +++ b/src/finn/transformation/fpgadataflow/insert_fifo.py @@ -268,7 +268,7 @@ def apply(self, model): fifo_input_tensor = oh.make_tensor_value_info( model.make_new_valueinfo_name(), n0_tensor_dtype, - n0.get_normal_output_shape(), + n0.get_normal_output_shape(out_ind), ) graph.value_info.append(fifo_input_tensor) model.set_tensor_datatype(fifo_input_tensor.name, dtype) @@ -294,7 +294,7 @@ def apply(self, model): graph.node.append(fifo_node) # set fifo output tensor as new input tensor of second node - final_node.output[0] = fifo_input_tensor.name + final_node.output[out_ind] = fifo_input_tensor.name else: warnings.warn( """Output FIFO for %s has depth %d and won't diff --git a/src/finn/transformation/streamline/reorder.py b/src/finn/transformation/streamline/reorder.py index 8ac2d7dad6..9a7e9d0723 100644 --- a/src/finn/transformation/streamline/reorder.py +++ b/src/finn/transformation/streamline/reorder.py @@ -29,6 +29,7 @@ import numpy as np import qonnx.core.data_layout as DataLayout import warnings +from copy import deepcopy from onnx import TensorProto from onnx import helper as oh from qonnx.core.datatype import DataType @@ -641,6 +642,10 @@ def apply(self, model): # if initializer is not scalar, skip if np.prod(init0.shape) != 1: continue + if model.is_fork_node(prod0): + model = model.transform(MoveOpPastFork(prod0.op_type)) + # topology modified, "ask" ModelWrapper to apply this transform again + return (model, True) # Flatten input if required if len(init0.shape) > 0: init0 = init0.flatten()[0] @@ -713,6 +718,12 @@ def apply(self, model): elif producer is not None and producer.op_type == "Transpose": perms = list(get_by_name(producer.attribute, "perm").ints) if perms == [0, 3, 1, 2]: + # check if the producer is a fork node + # (need to move it past the fork before this transform) + if model.is_fork_node(producer): + model = model.transform(MoveTransposePastFork()) + # topology modified, "ask" ModelWrapper to apply this transform again + return (model, True) ceil_mode = get_by_name(n.attribute, "ceil_mode") if ceil_mode is not None: ceil_mode = ceil_mode.i @@ -764,6 +775,12 @@ def apply(self, model): if producer is not None and producer.op_type == "Transpose": perms = list(get_by_name(producer.attribute, "perm").ints) if perms == [0, 3, 1, 2]: + # check if the producer is a fork node + # (need to move it past the fork before this transform) + if model.is_fork_node(producer): + model = model.transform(MoveTransposePastFork()) + # topology modified, "ask" ModelWrapper to apply this transform again + return (model, True) old_value = model.get_initializer(n.input[scales_ind]) new_value = np.array( [old_value[idx] for idx in (0, 2, 3, 1)], @@ -813,10 +830,9 @@ class MoveOpPastFork(Transformation): can be merged with nodes in the branches """ - def __init__(self, op_name_list, get_attrs_fxn=lambda x: {}): + def __init__(self, op_name_list): super().__init__() self.ops_to_move = op_name_list - self.get_attrs_fxn = get_attrs_fxn def apply(self, model): graph = model.graph @@ -859,11 +875,9 @@ def apply(self, model): new_param_name = model.make_new_valueinfo_name() new_inp_list = [n.input[0], new_param_name] model.set_initializer(new_param_name, op_init_param) - attrs = self.get_attrs_fxn(n) - # TODO use copy of original node instead to get attrs? - new_node = oh.make_node( - n.op_type, new_inp_list, [new_output_tensor_name], **attrs - ) + new_node = deepcopy(n) + new_node.input[:] = new_inp_list + new_node.output[:] = [new_output_tensor_name] graph.node.insert(node_ind, new_node) node_ind += 1 @@ -901,7 +915,7 @@ def __init__(self): class MoveTransposePastFork(MoveOpPastFork): def __init__(self): - super().__init__(["Transpose"], lambda x: {"perm": get_by_name(x.attribute, "perm").ints}) + super().__init__(["Transpose"]) class MoveMaxPoolPastMultiThreshold(Transformation):