From cc8d85283098ed7c8a9727e8b8367999c0051e0c Mon Sep 17 00:00:00 2001 From: zhengxuegui Date: Mon, 24 Jun 2024 20:25:29 +0800 Subject: [PATCH 1/2] [compiler] add forall-tiling pass --- compiler/include/byteir/Dialect/SCF/Passes.h | 1 + compiler/include/byteir/Dialect/SCF/Passes.td | 24 ++ .../Dialect/SCF/Transforms/ForallTiling.h | 33 +++ .../lib/Dialect/SCF/Transforms/CMakeLists.txt | 3 +- .../Dialect/SCF/Transforms/ForallTiling.cpp | 264 ++++++++++++++++++ .../lib/Dialect/SCF/Transforms/PassDetail.h | 4 + compiler/test/Dialect/SCF/forallTiling.mlir | 69 +++++ 7 files changed, 397 insertions(+), 1 deletion(-) create mode 100644 compiler/include/byteir/Dialect/SCF/Transforms/ForallTiling.h create mode 100644 compiler/lib/Dialect/SCF/Transforms/ForallTiling.cpp create mode 100644 compiler/test/Dialect/SCF/forallTiling.mlir diff --git a/compiler/include/byteir/Dialect/SCF/Passes.h b/compiler/include/byteir/Dialect/SCF/Passes.h index e8d7427d5..dd6b5a5c8 100644 --- a/compiler/include/byteir/Dialect/SCF/Passes.h +++ b/compiler/include/byteir/Dialect/SCF/Passes.h @@ -19,6 +19,7 @@ #define BYTEIR_DIALECT_SCF_PASSES_H #include "byteir/Dialect/SCF/Transforms/ForallCollapsing.h" +#include "byteir/Dialect/SCF/Transforms/ForallTiling.h" #include "byteir/Dialect/SCF/Transforms/FuseNestedForall.h" #include "byteir/Dialect/SCF/Transforms/InsertTrivialSCFLoop.h" diff --git a/compiler/include/byteir/Dialect/SCF/Passes.td b/compiler/include/byteir/Dialect/SCF/Passes.td index a4f3cc0c5..44309c9f3 100644 --- a/compiler/include/byteir/Dialect/SCF/Passes.td +++ b/compiler/include/byteir/Dialect/SCF/Passes.td @@ -72,4 +72,28 @@ def ForallCollapsing : Pass<"forall-collapsing", "mlir::func::FuncOp"> { ]; } +//===----------------------------------------------------------------------===// +// ForallTiling +//===----------------------------------------------------------------------===// + +def ForallTiling : Pass<"forall-tiling"> { + let summary = "tile forall Op with specific tileSize"; + let constructor = "mlir::createForallTilingPass()"; + let dependentDialects = [ + "scf::SCFDialect", + "affine::AffineDialect" + ]; + let options = [ + ListOption<"tileSizes", "tile-sizes", "int64_t", + "Factors to tile forall">, + Option<"noMinMaxBounds", "no-min-max-bounds", "bool", + /*default=*/"false", + "Perform tiling with fixed upper bound with inbound check " + "inside the internal loops">, + Option<"anchorTag", "anchor-tag", "std::string", + /*default=*/"", + "Optional unitAttr anchored tag to apply this pass">, + ]; +} + #endif // BYTEIR_DIALECT_SCF_PASSES \ No newline at end of file diff --git a/compiler/include/byteir/Dialect/SCF/Transforms/ForallTiling.h b/compiler/include/byteir/Dialect/SCF/Transforms/ForallTiling.h new file mode 100644 index 000000000..3363fde21 --- /dev/null +++ b/compiler/include/byteir/Dialect/SCF/Transforms/ForallTiling.h @@ -0,0 +1,33 @@ +//===- ForallTiling.h ------------------------------------- C++ --===// +// +// Copyright 2024 ByteDance Ltd. and/or its affiliates. All rights reserved. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// + +#ifndef BYTEIR_DIALECT_SCF_TRANSFORMS_FORALLTILING_H +#define BYTEIR_DIALECT_SCF_TRANSFORMS_FORALLTILING_H + +#include "mlir/Pass/Pass.h" +#include + +namespace mlir { + +std::unique_ptr +createForallTilingPass(llvm::ArrayRef tileSize = {}, + bool noMinMaxBounds = false, + llvm::StringRef anchorTag = ""); + +} // namespace mlir + +#endif // BYTEIR_DIALECT_SCF_TRANSFORMS_FORALLTILING_H diff --git a/compiler/lib/Dialect/SCF/Transforms/CMakeLists.txt b/compiler/lib/Dialect/SCF/Transforms/CMakeLists.txt index 967c79b5f..4891d0741 100644 --- a/compiler/lib/Dialect/SCF/Transforms/CMakeLists.txt +++ b/compiler/lib/Dialect/SCF/Transforms/CMakeLists.txt @@ -1,5 +1,6 @@ add_mlir_dialect_library(ByteIRSCFPasses ForallCollapsing.cpp + ForallTiling.cpp FuseNestedForall.cpp InsertTrivialSCFLoop.cpp TilingInterfaceToSCFFor.cpp @@ -21,4 +22,4 @@ add_mlir_dialect_library(ByteIRSCFPasses MLIRSCFTransforms MLIRSideEffectInterfaces MLIRSupport - ) +) diff --git a/compiler/lib/Dialect/SCF/Transforms/ForallTiling.cpp b/compiler/lib/Dialect/SCF/Transforms/ForallTiling.cpp new file mode 100644 index 000000000..5bd827494 --- /dev/null +++ b/compiler/lib/Dialect/SCF/Transforms/ForallTiling.cpp @@ -0,0 +1,264 @@ +//===- ForallTiling.cpp ------------------------------------ C++ --===// +// +// Copyright 2024 ByteDance Ltd. and/or its affiliates. All rights reserved. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +// Some code comes from mlir/lib/Dialect/SCF/Transforms/ParallelLoopTiling.cpp +// in LLVM project +// Orignal license: +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "byteir/Dialect/SCF/Transforms/ForallTiling.h" +#include "byteir/Utils/LoopUtils.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Dialect/Math/IR/Math.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/IR/IRMapping.h" +#include "mlir/Transforms/RegionUtils.h" +#include "llvm/ADT/DenseSet.h" +#include + +#include "PassDetail.h" + +using namespace llvm; +using namespace mlir; +using namespace mlir::scf; + +namespace { +std::pair tileForall(ForallOp forallOp, + ArrayRef tileSizes, + bool noMinMaxBounds) { + OpBuilder builder(forallOp); + auto loc = forallOp.getLoc(); + auto zero = builder.create(loc, 0); + SmallVector tileSizeConstants; + int64_t rank = forallOp.getRank(); + tileSizeConstants.reserve(rank); + for (int64_t i = 0; i < rank; ++i) { + tileSizeConstants.push_back( + builder.create(loc, tileSizes[i])); + } + + SmallVector oriSteps; + oriSteps = forallOp.getStep(builder); + + SmallVector outerSteps, outerLowerBounds, outerUpperBounds; + outerLowerBounds = forallOp.getLowerBound(builder); + outerUpperBounds = forallOp.getUpperBound(builder); + + outerSteps.reserve(rank); + + for (int64_t i = 0; i < rank; ++i) { + if (tileSizes[i] == 0) { + outerSteps.push_back(oriSteps[i]); + } else { + outerSteps.push_back(builder.create(loc, oriSteps[i], + tileSizeConstants[i])); + } + } + + auto outerForall = builder.create( + loc, forallOp.getMixedLowerBound(), forallOp.getMixedUpperBound(), + getAsOpFoldResult(outerSteps), ValueRange(), forallOp.getMapping()); + + builder.setInsertionPointToStart(outerForall.getBody()); + + // Compute min(size, dim - offset) to avoid out-of-bounds accesses. + auto minMap = AffineMap::get( + /*dimCount=*/3, /*symbolCount=*/0, + {getAffineDimExpr(/*position=*/0, builder.getContext()), + getAffineDimExpr(/*position=*/1, builder.getContext()) - + getAffineDimExpr(/*position=*/2, builder.getContext())}, + builder.getContext()); + + SmallVector innerUpperBounds, innerSteps; + SmallVector tiledOuterUpperBounds, tiledOuterIVs; + + innerUpperBounds.reserve(rank); + bool needInboundCheck = false; + for (auto [lowerBound, upperBound, newStep, iv, oriStep, tileSizeConstant] : + llvm::zip(outerLowerBounds, outerUpperBounds, outerSteps, + outerForall.getInductionVars(), oriSteps, tileSizeConstants)) { + // Collect the statically known loop bounds + auto lowerBoundConstant = + dyn_cast_or_null(lowerBound.getDefiningOp()); + auto upperBoundConstant = + dyn_cast_or_null(upperBound.getDefiningOp()); + auto stepConstant = + dyn_cast_or_null(oriStep.getDefiningOp()); + auto tileSize = + cast(tileSizeConstant.getDefiningOp()).value(); + if (tileSize == 0) { + continue; + } + innerSteps.push_back(oriStep); + tiledOuterUpperBounds.push_back(upperBound); + tiledOuterIVs.push_back(iv); + // If the loop bounds and the loop step are constant and if the number of + // loop iterations is an integer multiple of the tile size, we use a static + // bound for the inner loop. + if (lowerBoundConstant && upperBoundConstant && stepConstant) { + auto numIterations = llvm::divideCeil(upperBoundConstant.value() - + lowerBoundConstant.value(), + stepConstant.value()); + if (numIterations % tileSize == 0) { + innerUpperBounds.push_back(newStep); + continue; + } + } + + // For InboundCheck mode, just use the variable outer step + if (noMinMaxBounds) { + innerUpperBounds.push_back(newStep); + needInboundCheck = true; + continue; + } + + // Otherwise, we dynamically compute the bound for + // each iteration of the outer loop. + innerUpperBounds.push_back(builder.create( + loc, builder.getIndexType(), minMap, + ValueRange{newStep, upperBound, iv})); + } + + auto innerForall = builder.create( + loc, getAsOpFoldResult(SmallVector(innerUpperBounds.size(), zero)), + getAsOpFoldResult(innerUpperBounds), getAsOpFoldResult(innerSteps), + ValueRange(), std::nullopt); + + if (noMinMaxBounds && needInboundCheck) { + builder.setInsertionPointToStart(innerForall.getBody()); + // Insert in-bound check + Value inbound = + builder.create(loc, 1, builder.getIntegerType(1)); + for (auto [outerUpperBound, outerIV, innerIV, innerStep] : + llvm::zip(tiledOuterUpperBounds, tiledOuterIVs, + innerForall.getInductionVars(), innerSteps)) { + // %in_bound = %in_bound && + // (%inner_iv * %inner_step + %outer_iv < %outer_upper_bound) + Value index = builder.create( + loc, builder.create(loc, innerIV, innerStep), outerIV); + Value dimInbound = builder.create( + loc, arith::CmpIPredicate::ult, index, outerUpperBound); + inbound = builder.create(loc, inbound, dimInbound); + } + auto ifInbound = + builder.create(loc, + /*resultTypes*/ ArrayRef{}, inbound, + /*hasElseRegion*/ false); + builder.setInsertionPointToStart(innerForall.getBody()); + for (int64_t i = 0, tiled = 0; i < rank; ++i) { + Value iv; + if (tileSizes[i] == 0) { + iv = outerForall.getInductionVars()[i]; + } else { + Value innerIndex = innerForall.getInductionVars()[tiled]; + Value outerIndex = tiledOuterIVs[tiled]; + iv = builder.create(loc, innerIndex, outerIndex); + tiled += 1; + } + replaceAllUsesInRegionWith(forallOp.getBody()->getArgument(i), iv, + forallOp.getRegion()); + } + Block &thenBlock = ifInbound.getThenRegion().front(); + forallOp.getBody()->back().erase(); + // Replace the old forall with innerForall forall. + thenBlock.getOperations().splice(Block::iterator(thenBlock.back()), + forallOp.getBody()->getOperations()); + } else { + builder.setInsertionPointToStart(innerForall.getBody()); + for (int64_t i = 0, tiled = 0; i < rank; ++i) { + Value iv; + if (tileSizes[i] == 0) { + iv = outerForall.getInductionVars()[i]; + } else { + Value innerIndex = innerForall.getInductionVars()[tiled]; + Value outerIndex = tiledOuterIVs[tiled]; + iv = builder.create(loc, innerIndex, outerIndex); + tiled += 1; + } + replaceAllUsesInRegionWith(forallOp.getBody()->getArgument(i), iv, + forallOp.getRegion()); + } + // Replace the old forall with innerForall forall. + innerForall.getBody()->getOperations().splice( + Block::iterator(innerForall.getBody()->back()), + forallOp.getBody()->getOperations()); + // erase redudant scf.forall.in_parallel + innerForall.getBody()->back().erase(); + } + + // erase old forall + forallOp.erase(); + return std::make_pair(outerForall, innerForall); +} + +struct ForallTilingPass : public ForallTilingBase { + ForallTilingPass(ArrayRef tileSizes, bool noMinMaxBounds, + llvm::StringRef anchor) + : ForallTilingBase() { + anchorTag = anchor.str(); + this->tileSizes = tileSizes; + this->noMinMaxBounds = noMinMaxBounds; + } + void runOnOperation() override { + Operation *rootOp = getOperation(); + + SmallVector candidateForall; + if (llvm::all_of(tileSizes, [](int64_t val) { return val == 0; })) { + return; + } + + rootOp->walk([&](scf::ForallOp forallOp) { + // skip non-anchored + if (!anchorTag.empty() && !forallOp->hasAttr(anchorTag)) { + return; + } + + if (forallOp.getRank() != tileSizes.size()) { + mlir::emitError(mlir::UnknownLoc::get(&Pass::getContext()), + "tile size is not match the forallOp"); + return signalPassFailure(); + } + + if (forallOp.getOutputs().size() > 0) { + mlir::emitError(mlir::UnknownLoc::get(&Pass::getContext()), + "forall with tensor share_outs is not support."); + return signalPassFailure(); + } + candidateForall.emplace_back(forallOp); + }); + + for (auto forallOp : candidateForall) { + tileForall(forallOp, tileSizes, noMinMaxBounds); + } + } +}; + +} // namespace + +std::unique_ptr mlir::createForallTilingPass(ArrayRef tileSizes, + bool noMinMaxBounds, + llvm::StringRef anchor) { + return std::make_unique(tileSizes, noMinMaxBounds, anchor); +} diff --git a/compiler/lib/Dialect/SCF/Transforms/PassDetail.h b/compiler/lib/Dialect/SCF/Transforms/PassDetail.h index 150246c55..f728745e4 100644 --- a/compiler/lib/Dialect/SCF/Transforms/PassDetail.h +++ b/compiler/lib/Dialect/SCF/Transforms/PassDetail.h @@ -27,6 +27,10 @@ namespace scf { class SCFDialect; } // namespace scf +namespace affine { +class AffineDialect; +} // namepsace affine + #define GEN_PASS_CLASSES #include "byteir/Dialect/SCF/Passes.h.inc" diff --git a/compiler/test/Dialect/SCF/forallTiling.mlir b/compiler/test/Dialect/SCF/forallTiling.mlir new file mode 100644 index 000000000..6add041b5 --- /dev/null +++ b/compiler/test/Dialect/SCF/forallTiling.mlir @@ -0,0 +1,69 @@ +// RUN: byteir-opt %s --forall-tiling="tile-sizes=256" --split-input-file --canonicalize --cse | FileCheck %s + +func.func @Copy(%arg0: memref<32x64xf32>, %arg1: memref<32x64xf32>) attributes {__byteir_reduction_fusion__} { + %c64 = arith.constant 64 : index + scf.forall (%arg2) in (2048) { + %0 = arith.remsi %arg2, %c64 : index + %1 = arith.divsi %arg2, %c64 : index + %2 = memref.load %arg0[%1, %0] : memref<32x64xf32> + memref.store %2, %arg1[%1, %0] : memref<32x64xf32> + } + return +} + +// CHECK-LABEL: func.func @Copy +// CHECK-NEXT: %[[C64:.*]] = arith.constant 64 : index +// CHECK-NEXT: scf.forall (%[[ARG2:.*]]) = (0) to (2048) step (256) { + // CHECK-NEXT: scf.forall (%[[ARG3:.*]]) in (256) { + // CHECK-NEXT: %[[V0:.*]] = arith.addi %[[ARG3]], %[[ARG2]] : index + // CHECK-NEXT: %[[V1:.*]] = arith.remsi %[[V0]], %[[C64]] : index + // CHECK-NEXT: %[[V2:.*]] = arith.divsi %[[V0]], %[[C64]] : index + // CHECK-NEXT: %[[V3:.*]] = memref.load %arg0[%[[V2]], %[[V1]]] : memref<32x64xf32> + // CHECK-NEXT: memref.store %[[V3]], %arg1[%[[V2]], %[[V1]]] : memref<32x64xf32> + +// ----- + +#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +func.func @Elementwise(%arg0: memref<32x1024x?x30xf32>) -> memref<32768x?x30xf32> attributes {__byteir_elementwise_fusion__} { + %c983040 = arith.constant 983040 : index + %c30 = arith.constant 30 : index + %c2 = arith.constant 2 : index + %collapse_shape = memref.collapse_shape %arg0 [[0, 1], [2], [3]] : memref<32x1024x?x30xf32> into memref<32768x?x30xf32> + %dim = memref.dim %arg0, %c2 : memref<32x1024x?x30xf32> + %alloc = memref.alloc(%dim) : memref<32768x?x30xf32> + %0 = arith.muli %dim, %c983040 : index + scf.forall (%arg1) in (%0) { + %1 = arith.remsi %arg1, %c30 : index + %2 = arith.divsi %arg1, %c30 : index + %3 = arith.remsi %2, %dim : index + %4 = arith.divsi %2, %dim : index + %subview = memref.subview %collapse_shape[%4, %3, %1] [1, 1, 1] [1, 1, 1] : memref<32768x?x30xf32> to memref<1x1x1xf32, strided<[300, 30, 1], offset: ?>> + %subview_0 = memref.subview %alloc[%4, %3, %1] [1, 1, 1] [1, 1, 1] : memref<32768x?x30xf32> to memref<1x1x1xf32, strided<[300, 30, 1], offset: ?>> + linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel"]} ins(%subview : memref<1x1x1xf32, strided<[300, 30, 1], offset: ?>>) outs(%subview_0 : memref<1x1x1xf32, strided<[300, 30, 1], offset: ?>>) attrs = {__byteir_gpu_tile_elementwise_0} { + ^bb0(%in: f32, %out: f32): + %5 = arith.mulf %in, %in : f32 + linalg.yield %5 : f32 + } + } + return %alloc : memref<32768x?x30xf32> +} + +// CHECK: #[[$MAP_LOOP_SIZE:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 256)> +// CHECK-LABEL: func.func @Elementwise +// CHECK-DAG: %[[C983040:.*]] = arith.constant 983040 : index +// CHECK-DAG: %[[C30:.*]] = arith.constant 30 : index +// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[COLLAPSE:.*]] = memref.collapse_shape %arg0 +// CHECK: %[[DIM:.*]] = memref.dim %arg0, %[[C2]] : memref<32x1024x?x30xf32> +// CHECK-NEXT: %[[ALLOC:.*]] = memref.alloc(%[[DIM]]) : memref<32768x?x30xf32> +// CHECK-NEXT: %[[LB:.*]] = arith.muli %dim, %[[C983040]] : index +// CHECK-NEXT: scf.forall (%[[ARG1:.*]]) = (0) to (%[[LB]]) step (256) { + // CHECK-NEXT: %[[V1:.*]] = affine.min #[[$MAP_LOOP_SIZE]](%[[ARG1]])[%[[LB]]] + // CHECK-NEXT: scf.forall (%[[ARG2:.*]]) in (%[[V1:.*]]) { + // CHECK-NEXT: %[[V2:.*]] = arith.addi %[[ARG2]], %[[ARG1]] : index + // CHECK-NEXT: %[[V3:.*]] = arith.remsi %[[V2]], %[[C30]] : index + // CHECK-NEXT: %[[V4:.*]] = arith.divsi %[[V2]], %[[C30]] : index + // CHECK-NEXT: %[[V5:.*]] = arith.remsi %[[V4]], %[[DIM]] : index + // CHECK-NEXT: %[[V6:.*]] = arith.divsi %[[V4]], %[[DIM]] : index + // CHECK-NEXT: %[[SUBVIEW:.*]] = memref.subview %[[COLLAPSE]][%[[V6]], %[[V5]], %[[V3]]] [1, 1, 1] [1, 1, 1] : memref<32768x?x30xf32> to memref<1x1x1xf32, strided<[300, 30, 1], offset: ?>> + // CHECK-NEXT: %[[SUBVIEW_0:.*]] = memref.subview %[[ALLOC]][%[[V6]], %[[V5]], %[[V3]]] [1, 1, 1] [1, 1, 1] : memref<32768x?x30xf32> to memref<1x1x1xf32, strided<[300, 30, 1], offset: ?>> \ No newline at end of file From 0c72ddc5b8fa3a29b98a60a1f33b6762ec0ca8f4 Mon Sep 17 00:00:00 2001 From: zhengxuegui Date: Mon, 24 Jun 2024 21:17:03 +0800 Subject: [PATCH 2/2] format --- compiler/include/byteir/Dialect/SCF/Passes.td | 2 +- compiler/lib/Dialect/SCF/Transforms/PassDetail.h | 2 +- compiler/test/Dialect/SCF/forallTiling.mlir | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler/include/byteir/Dialect/SCF/Passes.td b/compiler/include/byteir/Dialect/SCF/Passes.td index 44309c9f3..1ce451e8d 100644 --- a/compiler/include/byteir/Dialect/SCF/Passes.td +++ b/compiler/include/byteir/Dialect/SCF/Passes.td @@ -77,7 +77,7 @@ def ForallCollapsing : Pass<"forall-collapsing", "mlir::func::FuncOp"> { //===----------------------------------------------------------------------===// def ForallTiling : Pass<"forall-tiling"> { - let summary = "tile forall Op with specific tileSize"; + let summary = "tile forall Op with given tileSize"; let constructor = "mlir::createForallTilingPass()"; let dependentDialects = [ "scf::SCFDialect", diff --git a/compiler/lib/Dialect/SCF/Transforms/PassDetail.h b/compiler/lib/Dialect/SCF/Transforms/PassDetail.h index f728745e4..8a56f8ed3 100644 --- a/compiler/lib/Dialect/SCF/Transforms/PassDetail.h +++ b/compiler/lib/Dialect/SCF/Transforms/PassDetail.h @@ -29,7 +29,7 @@ class SCFDialect; namespace affine { class AffineDialect; -} // namepsace affine +} // namespace affine #define GEN_PASS_CLASSES #include "byteir/Dialect/SCF/Passes.h.inc" diff --git a/compiler/test/Dialect/SCF/forallTiling.mlir b/compiler/test/Dialect/SCF/forallTiling.mlir index 6add041b5..2b5c0b63d 100644 --- a/compiler/test/Dialect/SCF/forallTiling.mlir +++ b/compiler/test/Dialect/SCF/forallTiling.mlir @@ -66,4 +66,4 @@ func.func @Elementwise(%arg0: memref<32x1024x?x30xf32>) -> memref<32768x?x30xf32 // CHECK-NEXT: %[[V5:.*]] = arith.remsi %[[V4]], %[[DIM]] : index // CHECK-NEXT: %[[V6:.*]] = arith.divsi %[[V4]], %[[DIM]] : index // CHECK-NEXT: %[[SUBVIEW:.*]] = memref.subview %[[COLLAPSE]][%[[V6]], %[[V5]], %[[V3]]] [1, 1, 1] [1, 1, 1] : memref<32768x?x30xf32> to memref<1x1x1xf32, strided<[300, 30, 1], offset: ?>> - // CHECK-NEXT: %[[SUBVIEW_0:.*]] = memref.subview %[[ALLOC]][%[[V6]], %[[V5]], %[[V3]]] [1, 1, 1] [1, 1, 1] : memref<32768x?x30xf32> to memref<1x1x1xf32, strided<[300, 30, 1], offset: ?>> \ No newline at end of file + // CHECK-NEXT: %[[SUBVIEW_0:.*]] = memref.subview %[[ALLOC]][%[[V6]], %[[V5]], %[[V3]]] [1, 1, 1] [1, 1, 1] : memref<32768x?x30xf32> to memref<1x1x1xf32, strided<[300, 30, 1], offset: ?>>