Skip to content

Commit

Permalink
Clang-format; make the test smaller to avoid timeout
Browse files Browse the repository at this point in the history
  • Loading branch information
erwei-xilinx committed Aug 16, 2024
1 parent 747191b commit c837cf7
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 19 deletions.
33 changes: 16 additions & 17 deletions test/xrt/16_matmul_8x16_core_transform_bf16/gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,13 @@
#map3 = affine_map<()[s0] -> (s0 * 8)>
#map4 = affine_map<(d0) -> (d0 * 4)>
module {
func.func @forward(%arg0: memref<512x1024xbf16>, %arg1: memref<128x8x8x4x16xbf16>, %arg2: memref<512x512xf32>) -> memref<512x512xf32> {
func.func @forward(%arg0: memref<512x256xbf16>, %arg1: memref<32x8x8x4x16xbf16>, %arg2: memref<512x512xf32>) -> memref<512x512xf32> {
%c8 = arith.constant 8 : index
%c16 = arith.constant 16 : index
%c64 = arith.constant 64 : index
%c32 = arith.constant 32 : index
%c128 = arith.constant 128 : index
%c256 = arith.constant 256 : index
%c1024 = arith.constant 1024 : index
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%cst = arith.constant 0.000000e+00 : f32
Expand All @@ -40,16 +39,16 @@
%0 = affine.apply #map(%arg3)
%1 = affine.apply #map(%arg4)
%subview = memref.subview %arg2[%0, %1] [128, 128] [1, 1] : memref<512x512xf32> to memref<128x128xf32, strided<[512, 1], offset: ?>>
%alloc_0 = memref.alloc() : memref<128x1024xbf16, 1>
scf.for %arg5 = %c0 to %c1024 step %c256 {
%subview_3 = memref.subview %arg0[%0, %arg5] [128, 256] [1, 1] : memref<512x1024xbf16> to memref<128x256xbf16, strided<[1024, 1], offset: ?>>
%subview_4 = memref.subview %alloc_0[0, %arg5] [128, 256] [1, 1] : memref<128x1024xbf16, 1> to memref<128x256xbf16, strided<[1024, 1], offset: ?>, 1>
linalg.copy ins(%subview_3 : memref<128x256xbf16, strided<[1024, 1], offset: ?>>) outs(%subview_4 : memref<128x256xbf16, strided<[1024, 1], offset: ?>, 1>)
%alloc_0 = memref.alloc() : memref<128x256xbf16, 1>
scf.for %arg5 = %c0 to %c256 step %c256 {
%subview_3 = memref.subview %arg0[%0, %arg5] [128, 256] [1, 1] : memref<512x256xbf16> to memref<128x256xbf16, strided<[256, 1], offset: ?>>
%subview_4 = memref.subview %alloc_0[0, %arg5] [128, 256] [1, 1] : memref<128x256xbf16, 1> to memref<128x256xbf16, strided<[256, 1], offset: ?>, 1>
linalg.copy ins(%subview_3 : memref<128x256xbf16, strided<[256, 1], offset: ?>>) outs(%subview_4 : memref<128x256xbf16, strided<[256, 1], offset: ?>, 1>)
}
%alloc_1 = memref.alloc() : memref<128x8x8x16xbf16, 1>
scf.for %arg5 = %c0 to %c128 step %c32 {
%subview_3 = memref.subview %arg1[%arg5, 0, 0, %arg4, 0] [32, 8, 8, 1, 16] [1, 1, 1, 1, 1] : memref<128x8x8x4x16xbf16> to memref<32x8x8x16xbf16, strided<[4096, 512, 64, 1], offset: ?>>
%subview_4 = memref.subview %alloc_1[%arg5, 0, 0, 0] [32, 8, 8, 16] [1, 1, 1, 1] : memref<128x8x8x16xbf16, 1> to memref<32x8x8x16xbf16, strided<[1024, 128, 16, 1], offset: ?>, 1>
%alloc_1 = memref.alloc() : memref<32x8x8x16xbf16, 1>
scf.for %arg5 = %c0 to %c32 step %c32 {
%subview_3 = memref.subview %arg1[%arg5, 0, 0, %arg4, 0] [32, 8, 8, 1, 16] [1, 1, 1, 1, 1] : memref<32x8x8x4x16xbf16> to memref<32x8x8x16xbf16, strided<[4096, 512, 64, 1], offset: ?>>
%subview_4 = memref.subview %alloc_1[%arg5, 0, 0, 0] [32, 8, 8, 16] [1, 1, 1, 1] : memref<32x8x8x16xbf16, 1> to memref<32x8x8x16xbf16, strided<[1024, 128, 16, 1], offset: ?>, 1>
linalg.copy ins(%subview_3 : memref<32x8x8x16xbf16, strided<[4096, 512, 64, 1], offset: ?>>) outs(%subview_4 : memref<32x8x8x16xbf16, strided<[1024, 128, 16, 1], offset: ?>, 1>)
}
%alloc_2 = memref.alloc() : memref<128x128xf32, 1>
Expand All @@ -60,14 +59,14 @@
%subview_3 = memref.subview %alloc_2[%2, %3] [64, 64] [1, 1] : memref<128x128xf32, 1> to memref<64x64xf32, strided<[128, 1], offset: ?>, 1>
%alloc_4 = memref.alloc() : memref<64x64xf32, 2>
linalg.fill ins(%cst : f32) outs(%alloc_4 : memref<64x64xf32, 2>)
scf.for %arg7 = %c0 to %c128 step %c8 {
scf.for %arg7 = %c0 to %c32 step %c8 {
%4 = affine.apply #map3()[%arg7]
%subview_5 = memref.subview %alloc_0[%2, %4] [64, 64] [1, 1] : memref<128x1024xbf16, 1> to memref<64x64xbf16, strided<[1024, 1], offset: ?>, 1>
%subview_6 = memref.subview %alloc_1[%arg7, %map, 0, 0] [8, 4, 8, 16] [1, 1, 1, 1] : memref<128x8x8x16xbf16, 1> to memref<8x4x8x16xbf16, strided<[1024, 128, 16, 1], offset: ?>, 1>
%subview_5 = memref.subview %alloc_0[%2, %4] [64, 64] [1, 1] : memref<128x256xbf16, 1> to memref<64x64xbf16, strided<[256, 1], offset: ?>, 1>
%subview_6 = memref.subview %alloc_1[%arg7, %map, 0, 0] [8, 4, 8, 16] [1, 1, 1, 1] : memref<32x8x8x16xbf16, 1> to memref<8x4x8x16xbf16, strided<[1024, 128, 16, 1], offset: ?>, 1>
%transpose = memref.transpose %subview_6 (d0, d1, d2, d3) -> (d0, d2, d1, d3) : memref<8x4x8x16xbf16, strided<[1024, 128, 16, 1], offset: ?>, 1> to memref<8x8x4x16xbf16, strided<[1024, 16, 128, 1], offset: ?>, 1>
%alloc_7 = memref.alloc() : memref<64x64xbf16, 2>
%alloc_8 = memref.alloc() : memref<8x8x4x16xbf16, 2>
memref.copy %subview_5, %alloc_7 : memref<64x64xbf16, strided<[1024, 1], offset: ?>, 1> to memref<64x64xbf16, 2>
memref.copy %subview_5, %alloc_7 : memref<64x64xbf16, strided<[256, 1], offset: ?>, 1> to memref<64x64xbf16, 2>
memref.copy %transpose, %alloc_8 : memref<8x8x4x16xbf16, strided<[1024, 16, 128, 1], offset: ?>, 1> to memref<8x8x4x16xbf16, 2>
%reshape = memref.reshape %alloc_8(%alloc) : (memref<8x8x4x16xbf16, 2>, memref<2xindex>) -> memref<64x64xbf16, 2>
linalg.matmul {cast = #linalg.type_fn<cast_signed>} ins(%alloc_7, %reshape : memref<64x64xbf16, 2>, memref<64x64xbf16, 2>) outs(%alloc_4 : memref<64x64xf32, 2>)
Expand All @@ -78,8 +77,8 @@
memref.dealloc %alloc_4 : memref<64x64xf32, 2>
} {mapping = [#gpu.thread<y>, #gpu.thread<x>]}
linalg.copy ins(%alloc_2 : memref<128x128xf32, 1>) outs(%subview : memref<128x128xf32, strided<[512, 1], offset: ?>>)
memref.dealloc %alloc_0 : memref<128x1024xbf16, 1>
memref.dealloc %alloc_1 : memref<128x8x8x16xbf16, 1>
memref.dealloc %alloc_0 : memref<128x256xbf16, 1>
memref.dealloc %alloc_1 : memref<32x8x8x16xbf16, 1>
memref.dealloc %alloc_2 : memref<128x128xf32, 1>
}
return %arg2 : memref<512x512xf32>
Expand Down
2 changes: 1 addition & 1 deletion test/xrt/16_matmul_8x16_core_transform_bf16/kernel.cpp
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

#include <aie_api/aie.hpp>

template <typename T, int M, int N>
template <typename T, int M, int N>
void zero_scalar(T *__restrict c) {
for (int i = 0; i < M * N; i++) {
c[i] = 0.0f;
Expand Down
2 changes: 1 addition & 1 deletion test/xrt/16_matmul_8x16_core_transform_bf16/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

M = 512
N = 512
K = 1024
K = 256

Tx = 16
Ty = 8
Expand Down

0 comments on commit c837cf7

Please sign in to comment.