Skip to content

Commit

Permalink
Bump mlir-air
Browse files Browse the repository at this point in the history
  • Loading branch information
erwei-xilinx committed Jul 27, 2023
2 parents 4ffacf3 + 1cb2a65 commit 15c7231
Show file tree
Hide file tree
Showing 48 changed files with 1,302 additions and 176 deletions.
9 changes: 4 additions & 5 deletions docs/building.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,10 @@ cd utils
./build-llvm-local.sh llvm build ../../install
```

Next, clone and build MLIR-AIE with paths to llvm, and cmakeModules repositories. Again, we use a common installation directory.

Next, clone and build MLIR-AIE with paths to llvm, aienginev2, and cmakeModules repositories. Again, we use a common installation directory. Note that in the following commands, we assume that the aienginev2 library is installed in /opt/xaiengine as directed in the `Building on x86 with runtime for PCIe` section. If the aienginev2 library was installed elsewhere, be sure that the 4th argument to build mlir-aie points to that location.
```
./clone-mlir-aie.sh
./build-mlir-aie-local.sh llvm mlir-aie/cmake/modulesXilinx mlir-aie build ../../install
./build-mlir-aie-local.sh llvm mlir-aie/cmake/modulesXilinx /opt/xaiengine mlir-aie build ../../install
```

The MLIR-AIE tools will be able to generate binaries targetting AIEngines.
Expand All @@ -82,7 +81,7 @@ Use the following command to build the AIR tools to compile on x86:

## Building on x86 with runtime for PCIe

In order to build and run on PCIe cards, you first have to build and install the aienginev2 library:
In order to build and run on PCIe cards, you first have to build and install the aienginev2 library. We chose to install the library in /opt/xaiengine but it is not required for the tools to be installed there. Just ensure that when building mlir-aie and mlir-air, that you point to the directory in which the aienginev2 library was installed.

```
git clone https://github.com/jnider/aie-rt
Expand All @@ -91,7 +90,7 @@ git checkout joel-aie
cd driver/src
make -f Makefile.Linux
sudo cp -r ../include /opt/aiengine/
sudo cp libxaiengine.so* /opt/aiengine/lib/
sudo cp libxaiengine.so* /opt/xaiengine/lib/
export LD_LIBRARY_PATH=/opt/xaiengine/lib:${LD_LIBRARY_PATH}
```

Expand Down
2 changes: 1 addition & 1 deletion driver/kernel_queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
#define AIR_PKT_TYPE_CORE_STATUS 0x0005L

#define AIR_PKT_TYPE_DEVICE_INITIALIZE 0x0010L
#define AIR_PKT_TYPE_HERD_INITIALIZE 0x0011L
#define AIR_PKT_TYPE_SEGMENT_INITIALIZE 0x0011L
#define AIR_PKT_TYPE_HELLO 0x0012L
#define AIR_PKT_TYPE_ALLOCATE_HERD_SHIM_DMAS 0x0013L
#define AIR_PKT_TYPE_GET_CAPABILITIES 0x0014L
Expand Down
6 changes: 3 additions & 3 deletions runtime_lib/airhost/host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -345,11 +345,11 @@ uint64_t air_segment_load(const char *name) {

wr_idx = queue_add_write_index(_air_host_active_segment.q, 1);
packet_id = wr_idx % _air_host_active_segment.q->size;
dispatch_packet_t *herd_pkt =
dispatch_packet_t *segment_pkt =
(dispatch_packet_t *)(_air_host_active_segment.q->base_address_vaddr) +
packet_id;
air_packet_herd_init(herd_pkt, 0, 0, 50, 1, 8);
air_queue_dispatch_and_wait(_air_host_active_segment.q, wr_idx, herd_pkt);
air_packet_segment_init(segment_pkt, 0, 0, 50, 1, 8);
air_queue_dispatch_and_wait(_air_host_active_segment.q, wr_idx, segment_pkt);

#else
XAie_Finish(&(_air_host_active_libxaie->DevInst));
Expand Down
6 changes: 3 additions & 3 deletions runtime_lib/airhost/include/air_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,9 @@ struct l2_dma_rsp_t {
};

// initialize pkt as a herd init packet with given parameters
hsa_status_t air_packet_herd_init(dispatch_packet_t *pkt, uint16_t herd_id,
uint8_t start_col, uint8_t num_cols,
uint8_t start_row, uint8_t num_rows);
hsa_status_t air_packet_segment_init(dispatch_packet_t *pkt, uint16_t herd_id,
uint8_t start_col, uint8_t num_cols,
uint8_t start_row, uint8_t num_rows);
// uint8_t start_row, uint8_t num_rows,
// uint16_t dma0, uint16_t dma1);

Expand Down
2 changes: 1 addition & 1 deletion runtime_lib/airhost/include/air_queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
#define AIR_PKT_TYPE_CORE_STATUS 0x0005L

#define AIR_PKT_TYPE_DEVICE_INITIALIZE 0x0010L
#define AIR_PKT_TYPE_HERD_INITIALIZE 0x0011L
#define AIR_PKT_TYPE_SEGMENT_INITIALIZE 0x0011L
#define AIR_PKT_TYPE_HELLO 0x0012L
#define AIR_PKT_TYPE_ALLOCATE_HERD_SHIM_DMAS 0x0013L
#define AIR_PKT_TYPE_GET_CAPABILITIES 0x0014L
Expand Down
8 changes: 4 additions & 4 deletions runtime_lib/airhost/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,9 @@ hsa_status_t air_packet_rw32_init(dispatch_packet_t *pkt, bool is_write,
return HSA_STATUS_SUCCESS;
}

hsa_status_t air_packet_herd_init(dispatch_packet_t *pkt, uint16_t herd_id,
uint8_t start_col, uint8_t num_cols,
uint8_t start_row, uint8_t num_rows) {
hsa_status_t air_packet_segment_init(dispatch_packet_t *pkt, uint16_t herd_id,
uint8_t start_col, uint8_t num_cols,
uint8_t start_row, uint8_t num_rows) {
// uint8_t start_row, uint8_t num_rows,
// uint16_t dma0, uint16_t dma1) {
initialize_packet(pkt);
Expand All @@ -205,7 +205,7 @@ hsa_status_t air_packet_herd_init(dispatch_packet_t *pkt, uint16_t herd_id,
pkt->arg[2] = 0; // unused
pkt->arg[3] = 0; // unused

pkt->type = AIR_PKT_TYPE_HERD_INITIALIZE;
pkt->type = AIR_PKT_TYPE_SEGMENT_INITIALIZE;
pkt->header = (HSA_PACKET_TYPE_AGENT_DISPATCH << HSA_PACKET_HEADER_TYPE);

return HSA_STATUS_SUCCESS;
Expand Down
125 changes: 101 additions & 24 deletions runtime_lib/controller/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@ int mlir_aie_reinit_device(aie_libxaie_ctx_t *ctx) {
return 0;
}

constexpr uint64_t NUM_BD = 16;

void mlir_aie_print_dma_status(aie_libxaie_ctx_t *ctx, int col, int row) {
// int col = loc.Col;
// int row = loc.Row;
Expand Down Expand Up @@ -210,7 +212,7 @@ void mlir_aie_print_dma_status(aie_libxaie_ctx_t *ctx, int col, int row) {
col, row, dma_mm2s_status, dma_mm2s0_control, dma_mm2s1_control,
dma_s2mm_status, dma_s2mm0_control, dma_s2mm1_control, dma_bd0_a,
dma_bd0_control, dma_bd1_a, dma_bd1_control);
for (int bd = 0; bd < 8; bd++) {
for (int bd = 0; bd < NUM_BD; bd++) {
u32 dma_bd_addr_a;
XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001D000 + (0x20 * bd),
&dma_bd_addr_a);
Expand Down Expand Up @@ -323,7 +325,7 @@ void mlir_aie_print_shimdma_status(aie_libxaie_ctx_t *ctx, int col, int row) {
col, row, dma_mm2s_status, dma_mm2s0_control, dma_mm2s1_control,
dma_s2mm_status, dma_s2mm0_control, dma_s2mm1_control, dma_bd0_a,
dma_bd0_control);
for (int bd = 0; bd < 8; bd++) {
for (int bd = 0; bd < 16; bd++) {
u32 dma_bd_addr_a;
XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001D000 + (0x14 * bd),
&dma_bd_addr_a);
Expand Down Expand Up @@ -820,6 +822,60 @@ void xaie_l2_dma_init(int col) {
xaie::out32(xaie::getTileAddr(col, 0) + 0x00033008, 0xFF);
}

#ifdef ARM_CONTROLLER

// Defining the NPI base and registers we use to reset the array
constexpr uint64_t npi_base = 0xF70A0000UL;
constexpr auto NPI_MASK_REG = 0x0;
constexpr auto NPI_VAL_REG = 0x4;
constexpr auto NPI_LOCK_REG = 0xC;
void xaie_array_reset() {

// Getting a pointer to NPI
auto *npib = (volatile uint32_t *)(npi_base);

// Performing array reset sequence
air_printf("Starting array reset\r\n");

// Unlocking NPI
npib[NPI_LOCK_REG >> 2] = 0xF9E8D7C6;

// Performing reset
npib[NPI_MASK_REG >> 2] = 0x04000000;
npib[NPI_VAL_REG >> 2] = 0x040381B1;
npib[NPI_MASK_REG >> 2] = 0x04000000;
npib[NPI_VAL_REG >> 2] = 0x000381B1;

// Locking NPI
npib[NPI_LOCK_REG >> 2] = 0x12341234;
air_printf("Done with array reset\r\n");
}

// This should be called after enabling the proper
// shims to be reset via the mask
void xaie_strobe_shim_reset() {

// Getting a pointer to NPI
auto *npib = (volatile uint32_t *)(npi_base);

air_printf("Starting shim reset\r\n");

// Unlocking NPI
npib[NPI_LOCK_REG >> 2] = 0xF9E8D7C6;

// Performing reset
npib[NPI_MASK_REG >> 2] = 0x08000000;
npib[NPI_VAL_REG >> 2] = 0x080381B1;
npib[NPI_MASK_REG >> 2] = 0x08000000;
npib[NPI_VAL_REG >> 2] = 0x000381B1;

// Locking NPI
npib[NPI_LOCK_REG >> 2] = 0x12341234;
air_printf("Done with shim reset\r\n");
}

#endif

void xaie_shim_dma_init(int col) {
// Invalidate all BDs by writing to their buffer control register
for (int ch = 0; ch < 4; ch++) {
Expand All @@ -832,8 +888,12 @@ void xaie_shim_dma_init(int col) {
}

void xaie_device_init(int num_cols) {

air_printf("Initializing device...\r\n");

// First, resetting the entire device
xaie_array_reset();

#ifdef ARM_CONTROLLER
int err = xaie2::mlir_aie_reinit_device(_xaie);
if (err)
Expand All @@ -849,27 +909,44 @@ void xaie_device_init(int num_cols) {
for (int c = 0; c < num_cols; c++) {
xaie_shim_dma_init(shim_dma_cols[c]);
}

// Turning the shim_reset_enable bit low for every column so they don't get
// reset when we perform a global shim reset
for (int col = 0; col < XAIE_NUM_COLS; col++) {
xaie::out32(xaie::getTileAddr(col, 0) + 0x0003604C, 0);
}
}

// Initialize one herd with lower left corner at (col_start, row_start)
void xaie_herd_init(int start_col, int num_cols, int start_row, int num_rows) {
// Initialize one segment with lower left corner at (col_start, row_start)
void xaie_segment_init(int start_col, int num_cols, int start_row,
int num_rows) {
HerdCfgInst.col_start = start_col;
HerdCfgInst.num_cols = num_cols;
HerdCfgInst.row_start = start_row;
HerdCfgInst.num_rows = num_rows;
#ifdef ARM_CONTROLLER

// Performing the shim reset
air_printf("Performing shim reset\r\n");
for (int c = start_col; c < start_col + num_cols; c++) {
for (int r = start_row; r < start_row + num_rows; r++) {
xaie::out32(xaie::getTileAddr(c, 0) + 0x00036048,
!!1); // 1 == ResetEnable
xaie::out32(xaie::getTileAddr(c, 0) + 0x00036048,
!!0); // 0 == ResetDisable
// for (int l = 0; l < 16; l++)
// xaie::maskpoll32(xaie::getTileAddr(c, r) + 0x0001E020 + 0x80 * l,
// 0x1,
// 0x1, 0);
}
xaie::out32(xaie::getTileAddr(c, 0) + 0x0003604C, 1);
}

xaie_strobe_shim_reset();

for (int c = start_col; c < start_col + num_cols; c++) {
xaie::out32(xaie::getTileAddr(c, 0) + 0x0003604C, 0);
}

// Performing the column reset
air_printf("Performing col reset\r\n");
for (int c = start_col; c < start_col + num_cols; c++) {
xaie::out32(xaie::getTileAddr(c, 0) + 0x00036048,
!!1); // 1 == ResetEnable
xaie::out32(xaie::getTileAddr(c, 0) + 0x00036048,
!!0); // 0 == ResetDisable
}

#endif
}

Expand Down Expand Up @@ -972,7 +1049,7 @@ void handle_packet_device_initialize(dispatch_packet_t *pkt) {
xaie_device_init(NUM_SHIM_DMAS);
}

void handle_packet_herd_initialize(dispatch_packet_t *pkt) {
void handle_packet_segment_initialize(dispatch_packet_t *pkt) {
setup = true;
packet_set_active(pkt, true);

Expand All @@ -983,16 +1060,16 @@ void handle_packet_herd_initialize(dispatch_packet_t *pkt) {
u32 start_col = (pkt->arg[0] >> 32) & 0xff;
u32 num_cols = (pkt->arg[0] >> 40) & 0xff;

u32 herd_id = pkt->arg[1] & 0xffff;
u32 segment_id = pkt->arg[1] & 0xffff;
u32 shimDMA0 = (pkt->arg[1] >> 16) & 0xff;
u32 shimDMA1 = (pkt->arg[1] >> 24) & 0xff;
// TODO more checks on herd dimensions
// TODO more checks on segment dimensions
if (start_row == 0)
start_row++;
xaie_herd_init(start_col, num_cols, start_row, num_rows);
air_printf("Initialized herd %d at (%d, %d) of size (%d,%d)\r\n", herd_id,
start_col, start_row, num_cols, num_rows);
// herd_id is ignored - current restriction is 1 herd -> 1 controller
xaie_segment_init(start_col, num_cols, start_row, num_rows);
air_printf("Initialized segment %d at (%d, %d) of size (%d,%d)\r\n",
segment_id, start_col, start_row, num_cols, num_rows);
// segment_id is ignored - current restriction is 1 segment -> 1 controller
// mappedShimDMA[0] = shimDMA0;
// mappedShimDMA[1] = shimDMA1;
// xaie_shim_dma_init(shimDMA0);
Expand All @@ -1001,7 +1078,7 @@ void handle_packet_herd_initialize(dispatch_packet_t *pkt) {
// air_printf("Initialized shim DMA physical idx %d to logical idx
// %d\r\n",shimDMA1,1);
} else {
air_printf("Unsupported address type 0x%04X for herd initialize\r\n",
air_printf("Unsupported address type 0x%04X for segment initialize\r\n",
(pkt->arg[0] >> 48) & 0xf);
}
}
Expand Down Expand Up @@ -1560,8 +1637,8 @@ void handle_agent_dispatch_packet(queue_t *q, uint32_t mb_id) {
complete_agent_dispatch_packet(pkt);
packets_processed++;
break;
case AIR_PKT_TYPE_HERD_INITIALIZE:
handle_packet_herd_initialize(pkt);
case AIR_PKT_TYPE_SEGMENT_INITIALIZE:
handle_packet_segment_initialize(pkt);
complete_agent_dispatch_packet(pkt);
packets_processed++;
break;
Expand Down
1 change: 1 addition & 0 deletions runtime_lib/controller/platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#define __PLATFORM_H_

#include "platform_config.h"
#include <cstdint>

void init_platform();
void cleanup_platform();
Expand Down
6 changes: 5 additions & 1 deletion runtime_lib/controller/shell.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,11 @@ void shell(void) {

// handle all characters from the UART (which is a slow interface) so the UI
// is responsive. If no character is waiting, go back to processing queues.
while ((in = XUartPsv_RecvByte(STDOUT_BASEADDRESS))) {
while (XUartPsv_IsReceiveData(STDOUT_BASEADDRESS)) {

// When we know that we have data, read it from the UART
in = XUartPsv_RecvByte(STDOUT_BASEADDRESS);

// make sure character will fit in the command buffer
if (cmd_len >= MAX_LINE_LENGTH) {
xil_printf("Line too long\r\n");
Expand Down
6 changes: 3 additions & 3 deletions test/02_mb_dispatch/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ int main(int argc, char *argv[]) {
auto num_rows = 1;
auto num_cols = 1;

dispatch_packet_t *herd_pkt =
dispatch_packet_t *segment_pkt =
(dispatch_packet_t *)(queues[0]->base_address_vaddr) + packet_id;
air_packet_herd_init(herd_pkt, 0, col, num_cols, row, num_rows);
air_queue_dispatch_and_wait(queues[0], wr_idx, herd_pkt);
air_packet_segment_init(segment_pkt, 0, col, num_cols, row, num_rows);
air_queue_dispatch_and_wait(queues[0], wr_idx, segment_pkt);

printf("PASS!\n");
return 0;
Expand Down
7 changes: 4 additions & 3 deletions test/03_mb_lock_rel/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,10 @@ int main(int argc, char *argv[])
packet_id = wr_idx % q->size;

// Set up the worlds smallest herd at 7,2
dispatch_packet_t *herd_pkt = (dispatch_packet_t*)(q->base_address_vaddr) + packet_id;
air_packet_herd_init(herd_pkt, herd_id, col, num_cols, row, num_rows);
air_queue_dispatch_and_wait(q, wr_idx, herd_pkt);
dispatch_packet_t *segment_pkt =
(dispatch_packet_t *)(q->base_address_vaddr) + packet_id;
air_packet_segment_init(segment_pkt, herd_id, col, num_cols, row, num_rows);
air_queue_dispatch_and_wait(q, wr_idx, segment_pkt);

// reserve another packet in the queue
wr_idx = queue_add_write_index(q, 1);
Expand Down
14 changes: 7 additions & 7 deletions test/07_mb_beef_maker/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,19 +77,19 @@ main(int argc, char *argv[])
uint64_t wr_idx = queue_add_write_index(queues[0], 1);
uint64_t packet_id = wr_idx % queues[0]->size;

// herd_setup packet
dispatch_packet_t *herd_pkt =
dispatch_packet_t *shim_pkt =
(dispatch_packet_t *)(queues[0]->base_address_vaddr) + packet_id;
air_packet_herd_init(herd_pkt, 0, col, 1, row, 1);
air_queue_dispatch_and_wait(queues[0], wr_idx, herd_pkt);
air_packet_device_init(shim_pkt, XAIE_NUM_COLS);
air_queue_dispatch_and_wait(queues[0], wr_idx, shim_pkt);

wr_idx = queue_add_write_index(queues[0], 1);
packet_id = wr_idx % queues[0]->size;

dispatch_packet_t *shim_pkt =
// herd_setup packet
dispatch_packet_t *segment_pkt =
(dispatch_packet_t *)(queues[0]->base_address_vaddr) + packet_id;
air_packet_device_init(shim_pkt, XAIE_NUM_COLS);
air_queue_dispatch_and_wait(queues[0], wr_idx, shim_pkt);
air_packet_segment_init(segment_pkt, 0, col, 1, row, 1);
air_queue_dispatch_and_wait(queues[0], wr_idx, segment_pkt);

mlir_aie_configure_cores(xaie);
mlir_aie_configure_switchboxes(xaie);
Expand Down
Loading

0 comments on commit 15c7231

Please sign in to comment.