From 7985d042666df7f3791ee2a8c65162b7770591da Mon Sep 17 00:00:00 2001 From: Erika Hunhoff Date: Tue, 13 Aug 2024 15:36:13 -0600 Subject: [PATCH] Channel Broadcast Examples (#688) * Stub out format of broadcast example * First attempt a broadcast * Add multi herd broadcast detection example * Make channel names a little less error prone * Updated broadcast example to use new XRTRunner class * Update documentation on broadcast examples * First attempt at exposing broadcast_shape ChannelOp addr to python bindings * closer to working * Add ChannelOp wrapper * Broadcast example works! * Fixed second broadcast example * flip broadcast shape for variety * update lit test and documentation * Use ChannelOp wrapper in tests and other programming examples --- .../channel_examples/README.md | 7 +- .../channel_examples/broadcast/README.md | 4 + .../broadcast/multi_herd/Makefile | 17 +++ .../broadcast/multi_herd/broadcast.py | 129 ++++++++++++++++++ .../broadcast/multi_herd/run_makefile.lit | 8 ++ .../broadcast/single_herd/Makefile | 17 +++ .../broadcast/single_herd/broadcast.py | 123 +++++++++++++++++ .../broadcast/single_herd/run_makefile.lit | 8 ++ .../channel_size/channel_size.py | 4 +- .../multi_segment/herd_to_herd.py | 6 +- .../single_segment/herd_to_herd.py | 6 +- .../hierarchical/hierarchical.py | 8 +- .../worker_to_self/worker_to_self.py | 6 +- .../worker_to_worker/worker_to_worker.py | 6 +- .../channel/transpose.py | 4 +- .../multi_core_channel/multi_core_channel.py | 4 +- .../multi_launch_channel.py | 4 +- .../single_core_channel.py | 4 +- .../multi_segment_channel/multi_segment.py | 8 +- .../passthrough_channel.py | 4 +- .../passthrough_kernel/passthrough_kernel.py | 4 +- python/air/dialects/_air_ops_ext.py | 33 +++++ python/test/dialect/channel_get_put.py | 6 +- test/xrt/02_mul_shim_1x1/run.py | 6 +- test/xrt/03_mul_L1L2_1x1/run.py | 12 +- 25 files changed, 391 insertions(+), 47 deletions(-) create mode 100644 programming_examples/channel_examples/broadcast/README.md create mode 100644 programming_examples/channel_examples/broadcast/multi_herd/Makefile create mode 100644 programming_examples/channel_examples/broadcast/multi_herd/broadcast.py create mode 100644 programming_examples/channel_examples/broadcast/multi_herd/run_makefile.lit create mode 100644 programming_examples/channel_examples/broadcast/single_herd/Makefile create mode 100644 programming_examples/channel_examples/broadcast/single_herd/broadcast.py create mode 100644 programming_examples/channel_examples/broadcast/single_herd/run_makefile.lit diff --git a/programming_examples/channel_examples/README.md b/programming_examples/channel_examples/README.md index ce5e89fc4..35ce7d234 100644 --- a/programming_examples/channel_examples/README.md +++ b/programming_examples/channel_examples/README.md @@ -16,7 +16,7 @@ Warning: The multi-segment example is a work in progress! #### ```channel-size```: Use the channel size argument -This example ([channel_size/channel_size.py](channel_size/channel_size.py)) is a data passthrough example using the same tiling structure as the [matrix_scalar_add/multi_core_channel](../matrix_scalar_add/multi_core_channel.py) examples, only instead of using a separately defined channel for each tile/core, a bundle of channels is created (using the `ChannelOp` `size` parameter) and indexed into (the `ChannelGet` and `ChannelPut` `indices` parameter). +This example ([channel_size/channel_size.py](channel_size/channel_size.py)) is a data passthrough example using the same tiling structure as the [matrix_scalar_add/multi_core_channel](../matrix_scalar_add/multi_core_channel.py) examples, only instead of using a separately defined channel for each tile/core, a bundle of channels is created (using the `Channel` `size` parameter) and indexed into (the `ChannelGet` and `ChannelPut` `indices` parameter). #### ```hierarchical```: Use channels for sending data from Launch to Segment to Herd and back again @@ -34,6 +34,11 @@ This example ([worker_to_worker/worker_to_worker.py](worker_to_worker/worker_to_ WARNING: This example currently fails for unknown reasons. +#### ```broadcast```: + +These examples ([broadcast/multi_herd/broadcast.py](broadcast/multi_herd/broadcast.py) and [broadcast/single_herd/broadcast.py](broadcast/single_herd/broadcast.py)) are examples of using channels to broadcast data to multiple workers. + + #### Usage (For All Examples) To generate AIR MLIR from Python: diff --git a/programming_examples/channel_examples/broadcast/README.md b/programming_examples/channel_examples/broadcast/README.md new file mode 100644 index 000000000..9162a2b7c --- /dev/null +++ b/programming_examples/channel_examples/broadcast/README.md @@ -0,0 +1,4 @@ +# Broadcast Examples + +In both of these examples, we attempt to broadcast an input `a` to 3 workers. In `single_herd`, the herd `size=[1, 3]` whereas in `multi_herd` there are 3 herds of `size=[1, 1]`. +The workers then add a unique value to each element in the input image and output the new image to a unique per-worker output. \ No newline at end of file diff --git a/programming_examples/channel_examples/broadcast/multi_herd/Makefile b/programming_examples/channel_examples/broadcast/multi_herd/Makefile new file mode 100644 index 000000000..d0cd3ada1 --- /dev/null +++ b/programming_examples/channel_examples/broadcast/multi_herd/Makefile @@ -0,0 +1,17 @@ +# (c) Copyright 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT +srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) + +targetname := $(shell basename ${srcdir}) + +all: run + +print: + ${powershell} python3 ${srcdir}/broadcast.py -p + +run: + mkdir -p ${srcdir}/build + cd ${srcdir}/build && ${powershell} python3 ${srcdir}/broadcast.py + +clean: + rm -rf ${srcdir}/build ${srcdir}/__pycache__ \ No newline at end of file diff --git a/programming_examples/channel_examples/broadcast/multi_herd/broadcast.py b/programming_examples/channel_examples/broadcast/multi_herd/broadcast.py new file mode 100644 index 000000000..590dac2b2 --- /dev/null +++ b/programming_examples/channel_examples/broadcast/multi_herd/broadcast.py @@ -0,0 +1,129 @@ +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT +import argparse +import numpy as np + +from air.ir import * +from air.dialects.air import * +from air.dialects.memref import AllocOp, DeallocOp, load, store +from air.dialects.func import FuncOp +from air.dialects.scf import for_, yield_ +from air.backend.xrt_runner import XRTRunner, type_mapper + +range_ = for_ + +IMAGE_WIDTH = 32 +IMAGE_HEIGHT = 16 +IMAGE_SIZE = [IMAGE_HEIGHT, IMAGE_WIDTH] + +INOUT_DATATYPE = np.int32 + +OUTPUT_HERD_NAMES = ["ChanOutB", "ChanOutC", "ChanOutD"] + + +@module_builder +def build_module(): + xrt_dtype = type_mapper(INOUT_DATATYPE) + memrefTyInOut = MemRefType.get(IMAGE_SIZE, xrt_dtype) + + mem_space_l1 = IntegerAttr.get(T.i32(), MemorySpace.L1) + image_type_l1 = MemRefType.get( + shape=IMAGE_SIZE, + element_type=xrt_dtype, + memory_space=mem_space_l1, + ) + + Channel("ChanIn", size=[1, 1], broadcast_shape=[3, 1]) + for name in OUTPUT_HERD_NAMES: + Channel(name) + + # We will send an image worth of data in and out + @FuncOp.from_py_func(memrefTyInOut, memrefTyInOut, memrefTyInOut, memrefTyInOut) + def copy(arg0, arg1, arg2, arg3): + + # The arguments are the input and output + @launch(operands=[arg0, arg1, arg2, arg3]) + def launch_body(a, b, c, d): + + ChannelPut("ChanIn", a) + ChannelGet(OUTPUT_HERD_NAMES[0], b) + ChannelGet(OUTPUT_HERD_NAMES[1], c) + ChannelGet(OUTPUT_HERD_NAMES[2], d) + + @segment(name="seg") + def segment_body(): + + for herd_num in range(3): + + @herd(name="broadcastherd" + str(herd_num), sizes=[1, 1]) + def herd_body(_tx, _ty, _sx, _sy): + + # We must allocate a buffer of image size for the input/output + image_in = AllocOp(image_type_l1, [], []) + image_out = AllocOp(image_type_l1, [], []) + + ChannelGet("ChanIn", image_in, indices=[herd_num, 0]) + + # Access every value in the image + for i in range_(IMAGE_HEIGHT): + for j in range_(IMAGE_WIDTH): + # Load the input value + val_in = load(image_in, [i, j]) + + # Calculate the output value + val_out = arith.addi( + val_in, arith.ConstantOp(T.i32(), herd_num + 1) + ) + + # Store the output value + store(val_out, image_out, [i, j]) + yield_([]) + yield_([]) + + ChannelPut(OUTPUT_HERD_NAMES[herd_num], image_out) + + DeallocOp(image_in) + DeallocOp(image_out) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + prog="run.py", + description="Builds, runs, and tests the channel broadcast multi herd example", + ) + parser.add_argument( + "-v", + "--verbose", + action="store_true", + ) + parser.add_argument( + "-p", + "--print-module-only", + action="store_true", + ) + args = parser.parse_args() + + mlir_module = build_module() + if args.print_module_only: + print(mlir_module) + exit(0) + + input_a = np.arange(np.prod(IMAGE_SIZE), dtype=INOUT_DATATYPE).reshape(IMAGE_SIZE) + output_b = np.arange(1, np.prod(IMAGE_SIZE) + 1, dtype=INOUT_DATATYPE).reshape( + IMAGE_SIZE + ) + output_c = np.arange(2, np.prod(IMAGE_SIZE) + 2, dtype=INOUT_DATATYPE).reshape( + IMAGE_SIZE + ) + output_d = np.arange(3, np.prod(IMAGE_SIZE) + 3, dtype=INOUT_DATATYPE).reshape( + IMAGE_SIZE + ) + + runner = XRTRunner(verbose=args.verbose, experimental_passes=True) + exit( + runner.run_test( + mlir_module, + inputs=[input_a], + expected_outputs=[output_b, output_c, output_d], + ) + ) diff --git a/programming_examples/channel_examples/broadcast/multi_herd/run_makefile.lit b/programming_examples/channel_examples/broadcast/multi_herd/run_makefile.lit new file mode 100644 index 000000000..f71210631 --- /dev/null +++ b/programming_examples/channel_examples/broadcast/multi_herd/run_makefile.lit @@ -0,0 +1,8 @@ +// (c) Copyright 2024 Advanced Micro Devices, Inc. + // SPDX-License-Identifier: MIT + // + // REQUIRES: ryzen_ai + // + // RUN: make -f %S/Makefile clean + // RUN: make -f %S/Makefile run | FileCheck %s + // CHECK: PASS! \ No newline at end of file diff --git a/programming_examples/channel_examples/broadcast/single_herd/Makefile b/programming_examples/channel_examples/broadcast/single_herd/Makefile new file mode 100644 index 000000000..d0cd3ada1 --- /dev/null +++ b/programming_examples/channel_examples/broadcast/single_herd/Makefile @@ -0,0 +1,17 @@ +# (c) Copyright 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT +srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) + +targetname := $(shell basename ${srcdir}) + +all: run + +print: + ${powershell} python3 ${srcdir}/broadcast.py -p + +run: + mkdir -p ${srcdir}/build + cd ${srcdir}/build && ${powershell} python3 ${srcdir}/broadcast.py + +clean: + rm -rf ${srcdir}/build ${srcdir}/__pycache__ \ No newline at end of file diff --git a/programming_examples/channel_examples/broadcast/single_herd/broadcast.py b/programming_examples/channel_examples/broadcast/single_herd/broadcast.py new file mode 100644 index 000000000..b15d554b8 --- /dev/null +++ b/programming_examples/channel_examples/broadcast/single_herd/broadcast.py @@ -0,0 +1,123 @@ +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT +import argparse +import numpy as np + +from air.ir import * +from air.dialects.air import * +from air.dialects.memref import AllocOp, DeallocOp, load, store +from air.dialects.func import FuncOp +from air.dialects.scf import for_, yield_ +from air.backend.xrt_runner import XRTRunner, type_mapper + +range_ = for_ + +IMAGE_WIDTH = 8 +IMAGE_HEIGHT = 6 +IMAGE_SIZE = [IMAGE_HEIGHT, IMAGE_WIDTH] + +INOUT_DATATYPE = np.int32 + + +@module_builder +def build_module(): + xrt_dtype = type_mapper(INOUT_DATATYPE) + memrefTyInOut = MemRefType.get(IMAGE_SIZE, xrt_dtype) + + mem_space_l1 = IntegerAttr.get(T.i32(), MemorySpace.L1) + image_type_l1 = MemRefType.get( + shape=IMAGE_SIZE, + element_type=xrt_dtype, + memory_space=mem_space_l1, + ) + + Channel("ChanIn", size=[1, 1], broadcast_shape=[1, 3]) + Channel("ChanOut", size=[1, 3]) + + # We will send an image worth of data in and out + @FuncOp.from_py_func(memrefTyInOut, memrefTyInOut, memrefTyInOut, memrefTyInOut) + def copy(arg0, arg1, arg2, arg3): + + # The arguments are the input and output + @launch(operands=[arg0, arg1, arg2, arg3]) + def launch_body(a, b, c, d): + + ChannelPut("ChanIn", a) + ChannelGet("ChanOut", b, indices=[0, 0]) + ChannelGet("ChanOut", c, indices=[0, 1]) + ChannelGet("ChanOut", d, indices=[0, 2]) + + @segment(name="seg") + def segment_body(): + + @herd(name="broadcastherd", sizes=[1, 3]) + def herd_body(tx, ty, _sx, _sy): + + # We must allocate a buffer of image size for the input/output + image_in = AllocOp(image_type_l1, [], []) + image_out = AllocOp(image_type_l1, [], []) + + ChannelGet("ChanIn", image_in, indices=[tx, ty]) + + # Access every value in the image + for i in range_(IMAGE_HEIGHT): + for j in range_(IMAGE_WIDTH): + # Load the input value + val_in = load(image_in, [i, j]) + + # Calculate the output value + val_out = arith.addi(val_in, arith.index_cast(T.i32(), ty)) + val_out = arith.addi(val_out, arith.ConstantOp(T.i32(), 1)) + + # Store the output value + store(val_out, image_out, [i, j]) + yield_([]) + yield_([]) + + ChannelPut("ChanOut", image_out, indices=[tx, ty]) + + DeallocOp(image_in) + DeallocOp(image_out) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + prog="run.py", + description="Builds, runs, and tests the channel broadcast multi herd example", + ) + parser.add_argument( + "-v", + "--verbose", + action="store_true", + ) + parser.add_argument( + "-p", + "--print-module-only", + action="store_true", + ) + args = parser.parse_args() + + mlir_module = build_module() + if args.print_module_only: + print(mlir_module) + exit(0) + + input_a = np.arange(np.prod(IMAGE_SIZE), dtype=INOUT_DATATYPE).reshape(IMAGE_SIZE) + output_b = np.arange(1, np.prod(IMAGE_SIZE) + 1, dtype=INOUT_DATATYPE).reshape( + IMAGE_SIZE + ) + output_c = np.arange(2, np.prod(IMAGE_SIZE) + 2, dtype=INOUT_DATATYPE).reshape( + IMAGE_SIZE + ) + output_d = np.arange(3, np.prod(IMAGE_SIZE) + 3, dtype=INOUT_DATATYPE).reshape( + IMAGE_SIZE + ) + + runner = XRTRunner(verbose=args.verbose, experimental_passes=True) + exit( + runner.run_test( + mlir_module, + inputs=[input_a], + expected_outputs=[output_b, output_c, output_d], + ) + ) diff --git a/programming_examples/channel_examples/broadcast/single_herd/run_makefile.lit b/programming_examples/channel_examples/broadcast/single_herd/run_makefile.lit new file mode 100644 index 000000000..f71210631 --- /dev/null +++ b/programming_examples/channel_examples/broadcast/single_herd/run_makefile.lit @@ -0,0 +1,8 @@ +// (c) Copyright 2024 Advanced Micro Devices, Inc. + // SPDX-License-Identifier: MIT + // + // REQUIRES: ryzen_ai + // + // RUN: make -f %S/Makefile clean + // RUN: make -f %S/Makefile run | FileCheck %s + // CHECK: PASS! \ No newline at end of file diff --git a/programming_examples/channel_examples/channel_size/channel_size.py b/programming_examples/channel_examples/channel_size/channel_size.py index 89d335acf..dd755b87f 100644 --- a/programming_examples/channel_examples/channel_size/channel_size.py +++ b/programming_examples/channel_examples/channel_size/channel_size.py @@ -32,8 +32,8 @@ def build_module(): memrefTyInOut = MemRefType.get(IMAGE_SIZE, xrt_dtype) # Create an input/output channel pair per worker - ChannelOp("ChanIn", size=[IMAGE_HEIGHT // TILE_HEIGHT, IMAGE_WIDTH // TILE_WIDTH]) - ChannelOp("ChanOut", size=[IMAGE_HEIGHT // TILE_HEIGHT, IMAGE_WIDTH // TILE_WIDTH]) + Channel("ChanIn", size=[IMAGE_HEIGHT // TILE_HEIGHT, IMAGE_WIDTH // TILE_WIDTH]) + Channel("ChanOut", size=[IMAGE_HEIGHT // TILE_HEIGHT, IMAGE_WIDTH // TILE_WIDTH]) # We will send an image worth of data in and out @FuncOp.from_py_func(memrefTyInOut, memrefTyInOut) diff --git a/programming_examples/channel_examples/herd_to_herd/multi_segment/herd_to_herd.py b/programming_examples/channel_examples/herd_to_herd/multi_segment/herd_to_herd.py index 9f9e6a4c8..3db2acb29 100644 --- a/programming_examples/channel_examples/herd_to_herd/multi_segment/herd_to_herd.py +++ b/programming_examples/channel_examples/herd_to_herd/multi_segment/herd_to_herd.py @@ -36,11 +36,11 @@ def build_module(): # Create two channels which will send/receive the # input/output data respectively - ChannelOp("ChanIn") - ChannelOp("ChanOut") + Channel("ChanIn") + Channel("ChanOut") # Create a channel we will use to pass data between works in two herds - ChannelOp("Herd2Herd") + Channel("Herd2Herd") # We will send an image worth of data in and out @FuncOp.from_py_func(memrefTyInOut, memrefTyInOut) diff --git a/programming_examples/channel_examples/herd_to_herd/single_segment/herd_to_herd.py b/programming_examples/channel_examples/herd_to_herd/single_segment/herd_to_herd.py index 729802f4d..88052954a 100644 --- a/programming_examples/channel_examples/herd_to_herd/single_segment/herd_to_herd.py +++ b/programming_examples/channel_examples/herd_to_herd/single_segment/herd_to_herd.py @@ -38,11 +38,11 @@ def build_module(): # Create two channels which will send/receive the # input/output data respectively - ChannelOp("ChanIn") - ChannelOp("ChanOut") + Channel("ChanIn") + Channel("ChanOut") # Create a channel we will use to pass data between works in two herds - ChannelOp("Herd2Herd") + Channel("Herd2Herd") # We will send an image worth of data in and out @FuncOp.from_py_func(memrefTyInOut, memrefTyInOut) diff --git a/programming_examples/channel_examples/hierarchical/hierarchical.py b/programming_examples/channel_examples/hierarchical/hierarchical.py index f9969bb82..568b49d49 100644 --- a/programming_examples/channel_examples/hierarchical/hierarchical.py +++ b/programming_examples/channel_examples/hierarchical/hierarchical.py @@ -38,10 +38,10 @@ def build_module(): memory_space=mem_space_l2, ) - ChannelOp("ChanInL2") - ChannelOp("ChanOutL2") - ChannelOp("ChanInL1") - ChannelOp("ChanOutL1") + Channel("ChanInL2") + Channel("ChanOutL2") + Channel("ChanInL1") + Channel("ChanOutL1") # We will send an image worth of data in and out @FuncOp.from_py_func(memrefTyInOut, memrefTyInOut) diff --git a/programming_examples/channel_examples/worker_to_self/worker_to_self.py b/programming_examples/channel_examples/worker_to_self/worker_to_self.py index 0d19f24eb..cc8693211 100644 --- a/programming_examples/channel_examples/worker_to_self/worker_to_self.py +++ b/programming_examples/channel_examples/worker_to_self/worker_to_self.py @@ -25,9 +25,9 @@ def build_module(): # Type and method of input/output memrefTyInOut = T.MemRefType.get(IMAGE_SIZE, xrt_dtype) - ChannelOp("ChanIn") - ChannelOp("ChanOut") - ChannelOp("ToSelf") + Channel("ChanIn") + Channel("ChanOut") + Channel("ToSelf") mem_space_l1 = IntegerAttr.get(T.i32(), MemorySpace.L1) image_type_l1 = MemRefType.get( diff --git a/programming_examples/channel_examples/worker_to_worker/worker_to_worker.py b/programming_examples/channel_examples/worker_to_worker/worker_to_worker.py index a88b5ba06..40b8b5361 100644 --- a/programming_examples/channel_examples/worker_to_worker/worker_to_worker.py +++ b/programming_examples/channel_examples/worker_to_worker/worker_to_worker.py @@ -33,9 +33,9 @@ def build_module(): memrefTyInOut = MemRefType.get(IMAGE_SIZE, xrt_dtype) # Create an input/output channel pair per worker - ChannelOp("ChanIn", size=[IMAGE_HEIGHT // TILE_HEIGHT, IMAGE_WIDTH // TILE_WIDTH]) - ChannelOp("ChanOut", size=[IMAGE_HEIGHT // TILE_HEIGHT, IMAGE_WIDTH // TILE_WIDTH]) - ChannelOp( + Channel("ChanIn", size=[IMAGE_HEIGHT // TILE_HEIGHT, IMAGE_WIDTH // TILE_WIDTH]) + Channel("ChanOut", size=[IMAGE_HEIGHT // TILE_HEIGHT, IMAGE_WIDTH // TILE_WIDTH]) + Channel( "SwitchTiles", size=[IMAGE_HEIGHT // TILE_HEIGHT, IMAGE_WIDTH // TILE_WIDTH] ) diff --git a/programming_examples/data_transfer_transpose/channel/transpose.py b/programming_examples/data_transfer_transpose/channel/transpose.py index f1fe6cf66..94638b969 100644 --- a/programming_examples/data_transfer_transpose/channel/transpose.py +++ b/programming_examples/data_transfer_transpose/channel/transpose.py @@ -23,8 +23,8 @@ def build_module(m, k, dtype): memrefTyIn = MemRefType.get(shape=[m, k], element_type=xrt_dtype) memrefTyOut = MemRefType.get(shape=[k, m], element_type=xrt_dtype) - ChannelOp("ChanIn") - ChannelOp("ChanOut") + Channel("ChanIn") + Channel("ChanOut") # We will send an image worth of data in and out @FuncOp.from_py_func(memrefTyIn, memrefTyOut) diff --git a/programming_examples/matrix_scalar_add/multi_core_channel/multi_core_channel.py b/programming_examples/matrix_scalar_add/multi_core_channel/multi_core_channel.py index 02a3f0157..53e1b60e9 100644 --- a/programming_examples/matrix_scalar_add/multi_core_channel/multi_core_channel.py +++ b/programming_examples/matrix_scalar_add/multi_core_channel/multi_core_channel.py @@ -29,8 +29,8 @@ def build_module(image_height, image_width, tile_height, tile_width, np_dtype): # Create an input/output channel pair per worker for h in range(image_height // tile_height): for w in range(image_width // tile_width): - ChannelOp(format_name("ChanIn", h, w)) - ChannelOp(format_name("ChanOut", h, w)) + Channel(format_name("ChanIn", h, w)) + Channel(format_name("ChanOut", h, w)) # We will send an image worth of data in and out @FuncOp.from_py_func(memrefTyInOut, memrefTyInOut) diff --git a/programming_examples/matrix_scalar_add/multi_launch_channel/multi_launch_channel.py b/programming_examples/matrix_scalar_add/multi_launch_channel/multi_launch_channel.py index 17249ecf7..c56688982 100644 --- a/programming_examples/matrix_scalar_add/multi_launch_channel/multi_launch_channel.py +++ b/programming_examples/matrix_scalar_add/multi_launch_channel/multi_launch_channel.py @@ -29,8 +29,8 @@ def build_module(image_height, image_width, tile_height, tile_width, np_dtype): # Create an input/output channel pair per launch for h in range(image_height // tile_height): for w in range(image_width // tile_width): - ChannelOp(format_name("ChanIn", h, w)) - ChannelOp(format_name("ChanOut", h, w)) + Channel(format_name("ChanIn", h, w)) + Channel(format_name("ChanOut", h, w)) # We will send an image worth of data in and out @FuncOp.from_py_func(memrefTyInOut, memrefTyInOut) diff --git a/programming_examples/matrix_scalar_add/single_core_channel/single_core_channel.py b/programming_examples/matrix_scalar_add/single_core_channel/single_core_channel.py index b48d41f82..766d6f8de 100644 --- a/programming_examples/matrix_scalar_add/single_core_channel/single_core_channel.py +++ b/programming_examples/matrix_scalar_add/single_core_channel/single_core_channel.py @@ -24,8 +24,8 @@ def build_module(image_height, image_width, tile_height, tile_width, np_dtype): # Create two channels which will send/receive the # input/output data respectively - ChannelOp("ChanIn") - ChannelOp("ChanOut") + Channel("ChanIn") + Channel("ChanOut") # We will send an image worth of data in and out @FuncOp.from_py_func(memrefTyInOut, memrefTyInOut) diff --git a/programming_examples/multi_segment/multi_segment_channel/multi_segment.py b/programming_examples/multi_segment/multi_segment_channel/multi_segment.py index 705cec06f..570be2840 100644 --- a/programming_examples/multi_segment/multi_segment_channel/multi_segment.py +++ b/programming_examples/multi_segment/multi_segment_channel/multi_segment.py @@ -33,10 +33,10 @@ def build_module(): memory_space=mem_space_l1, ) - ChannelOp("ChanInA") - ChannelOp("ChanInB") - ChannelOp("ChanOutC") - ChannelOp("ChanOutD") + Channel("ChanInA") + Channel("ChanInB") + Channel("ChanOutC") + Channel("ChanOutD") # We will send an image worth of data in and out @FuncOp.from_py_func(memrefTyInOut, memrefTyInOut, memrefTyInOut, memrefTyInOut) diff --git a/programming_examples/passthrough/passthrough_channel/passthrough_channel.py b/programming_examples/passthrough/passthrough_channel/passthrough_channel.py index 776576fa1..f438e68ec 100644 --- a/programming_examples/passthrough/passthrough_channel/passthrough_channel.py +++ b/programming_examples/passthrough/passthrough_channel/passthrough_channel.py @@ -22,8 +22,8 @@ def build_module(vector_size, num_subvectors): # Type and method of input/output memrefTyInOut = T.memref(vector_size, xrt_dtype) - ChannelOp("ChanIn") - ChannelOp("ChanOut") + Channel("ChanIn") + Channel("ChanOut") # The compute core splits input into subvectors for processing lineWidthInBytes = vector_size // num_subvectors diff --git a/programming_examples/passthrough/passthrough_kernel/passthrough_kernel.py b/programming_examples/passthrough/passthrough_kernel/passthrough_kernel.py index 7268cfadb..7bfd628ee 100644 --- a/programming_examples/passthrough/passthrough_kernel/passthrough_kernel.py +++ b/programming_examples/passthrough/passthrough_kernel/passthrough_kernel.py @@ -22,8 +22,8 @@ def build_module(vector_size, num_subvectors): # Type and method of input/output memrefTyInOut = T.memref(vector_size, xrt_dtype) - ChannelOp("ChanIn") - ChannelOp("ChanOut") + Channel("ChanIn") + Channel("ChanOut") # The compute core splits input into subvectors for processing lineWidthInBytes = vector_size // num_subvectors diff --git a/python/air/dialects/_air_ops_ext.py b/python/air/dialects/_air_ops_ext.py index 749583ea4..65f837938 100644 --- a/python/air/dialects/_air_ops_ext.py +++ b/python/air/dialects/_air_ops_ext.py @@ -5,6 +5,7 @@ # SPDX-License-Identifier: MIT import functools +from typing import Optional, Sequence, Union from ..ir import * from ._air_ops_gen import * @@ -129,6 +130,38 @@ def __init__( self.regions[0].blocks.append(*operand_types) +class Channel(ChannelOp): + def __init__( + self, + sym_name, + broadcast_shape: Optional[ + Union[Sequence[Union[int, IntegerAttr, Operation, Value]], ArrayAttr] + ] = None, + size=None, + loc=None, + ip=None, + ): + super().__init__( + sym_name=sym_name, + size=size, + loc=loc, + ip=ip, + ) + + if not (broadcast_shape is None): + static_sizes = [] + if isinstance(broadcast_shape, ArrayAttr): + broadcast_shape_attr = broadcast_shape + else: + for size in broadcast_shape: + if isinstance(size, int): + static_sizes.append(IntegerAttr.get(T.index(), size)) + else: + static_sizes.append(ShapedType.get_dynamic_size()) + broadcast_shape_attr = ArrayAttr.get(static_sizes) + super().attributes["broadcast_shape"] = broadcast_shape_attr + + class ChannelGet(ChannelGetOp): def __init__( self, diff --git a/python/test/dialect/channel_get_put.py b/python/test/dialect/channel_get_put.py index ecec57e72..4da1994db 100644 --- a/python/test/dialect/channel_get_put.py +++ b/python/test/dialect/channel_get_put.py @@ -27,9 +27,9 @@ def build_module(shape, idtype, odtype): # CHECK: air.channel @ChanA # CHECK: air.channel @ChanB # CHECK: air.channel @ChanC - ChannelOp("ChanA") - ChannelOp("ChanB") - ChannelOp("ChanC") + Channel("ChanA") + Channel("ChanB") + Channel("ChanC") @FuncOp.from_py_func(memrefTyIn, memrefTyIn, memrefTyOut) def mul(arg0, arg1, arg2): diff --git a/test/xrt/02_mul_shim_1x1/run.py b/test/xrt/02_mul_shim_1x1/run.py index 04be0b7ae..fd957f43a 100644 --- a/test/xrt/02_mul_shim_1x1/run.py +++ b/test/xrt/02_mul_shim_1x1/run.py @@ -48,9 +48,9 @@ def to_type(dtype): def build_module(shape, idtype, odtype, tile_size): memrefTyIn = MemRefType.get(shape, to_type(idtype)) memrefTyOut = MemRefType.get(shape, to_type(odtype)) - ChannelOp("ChanA") - ChannelOp("ChanB") - ChannelOp("ChanC") + Channel("ChanA") + Channel("ChanB") + Channel("ChanC") @FuncOp.from_py_func(memrefTyIn, memrefTyIn, memrefTyOut) def mul(arg0, arg1, arg2): diff --git a/test/xrt/03_mul_L1L2_1x1/run.py b/test/xrt/03_mul_L1L2_1x1/run.py index 23496b0f6..4351503a1 100644 --- a/test/xrt/03_mul_L1L2_1x1/run.py +++ b/test/xrt/03_mul_L1L2_1x1/run.py @@ -72,12 +72,12 @@ def build_module(idtype, odtype, l3_shape, l2_shape, l1_shape): memory_space=l2_mem_space, ) - ChannelOp("ChanL2A") - ChannelOp("ChanL2B") - ChannelOp("ChanL2C") - ChannelOp("ChanL1A") - ChannelOp("ChanL1B") - ChannelOp("ChanL1C") + Channel("ChanL2A") + Channel("ChanL2B") + Channel("ChanL2C") + Channel("ChanL1A") + Channel("ChanL1B") + Channel("ChanL1C") @FuncOp.from_py_func(memrefTyIn, memrefTyIn, memrefTyOut) def mul(arg0, arg1, arg2):