Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[codegen][gpu] Adding conv filter layout fhwc to preprocessing pipeline #19974

Merged
merged 2 commits into from
Feb 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,6 @@ class ConvertConvFilterToChannelsLastPass
LDBG("convert-filter-to-channels-last pass didn't apply since an "
"unsupported layout is given. Please use hwfc or fhwc as pass "
"filter-layout option.");
// TODO add default fallback to filter layout once we have more data
// about models with the two layouts
return signalPassFailure();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ padConvOp(RewriterBase &rewriter, linalg::LinalgOp linalgOp,
auto id1 = exprToIdMap[getAffineDimExpr(dim1, map.getContext())];
auto id2 = exprToIdMap[getAffineDimExpr(dim2, map.getContext())];

llvm::SmallVector<OpFoldResult> paddingValues(4, zero);
llvm::SmallVector<OpFoldResult> paddingValues(map.getNumResults(), zero);
paddingValues[id1] = padding1;
paddingValues[id2] = padding2;
paddingTarget =
Expand All @@ -310,16 +310,20 @@ padConvOp(RewriterBase &rewriter, linalg::LinalgOp linalgOp,
linalg::LinalgOp paddedConv2dOp =
mlir::clone(rewriter, linalgOp, {newOutput.getType()},
ArrayRef<Value>{newInput, newFilter, newOutput});

// Extract slice.
IntegerAttr one = rewriter.getI64IntegerAttr(1);
SmallVector<OpFoldResult> offsets(4, zero);
SmallVector<OpFoldResult> strides(4, one);
RankedTensorType outputType = cast<RankedTensorType>(newOutput.getType());
int64_t outputRank = outputType.getRank();
SmallVector<OpFoldResult> offsets(outputRank, zero);
SmallVector<OpFoldResult> strides(outputRank, one);

auto resultType = cast<RankedTensorType>(linalgOp->getResult(0).getType());
ArrayRef<int64_t> resultShape = resultType.getShape();
SmallVector<OpFoldResult> sizes = {rewriter.getIndexAttr(resultShape[0]),
rewriter.getIndexAttr(resultShape[1]),
rewriter.getIndexAttr(resultShape[2]),
rewriter.getIndexAttr(resultShape[3])};
SmallVector<OpFoldResult> sizes;
for (int i = 0; i < outputRank; i++) {
sizes.push_back(rewriter.getIndexAttr(resultShape[i]));
}
Value extracted = rewriter.createOrFold<tensor::ExtractSliceOp>(
loc, paddedConv2dOp->getResults()[0], offsets, sizes, strides);
rewriter.replaceOp(linalgOp, extracted);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def ConvertConvFilterToChannelsLastPass:
let summary = "Convert linalg convolutions filter from hwcf to channel last layout.";
let options = [
Option<"filterLayout", "filter-layout", "std::string",
/*default=*/"", "Filter layout of convolution.">,
/*default=*/"\"fhwc\"", "Filter layout of convolution.">,
];
let dependentDialects = [
"mlir::linalg::LinalgDialect",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(util.func(iree-preprocessing-convert-conv-filter-to-channels-last{filter-layout=hwfc}))" %s | FileCheck --check-prefixes=CHECK-HWFC,CHECK-ALL %s
// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(util.func(iree-preprocessing-convert-conv-filter-to-channels-last{filter-layout=fhwc}))" %s | FileCheck --check-prefixes=CHECK-FHWC,CHECK-ALL %s
// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(util.func(iree-preprocessing-convert-conv-filter-to-channels-last))" %s | FileCheck --check-prefixes=CHECK-FHWC,CHECK-ALL %s

// CHECK-HWFC: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d4, d5, d3, d6)>
// CHECK-FHWC: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d3, d4, d5, d6)>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,44 @@ func.func @conv_generic_nhwc_fhwc(%arg0: tensor<2x130x130x4xf16>, %arg1: tensor<

// -----

#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0 + d3, d1 + d4, d5)>
#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d3, d4, d5)>
#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2)>

// CHECK-LABEL: func.func @conv2d_no_batch(
// CHECK-SAME: %[[ARG0:.+]]: tensor<1026x1026x128xf16>
// CHECK-SAME: %[[ARG1:.+]]: tensor<3x3x3x128xf16>
// CHECK-SAME: %[[ARG2:.+]]: tensor<1024x1024x3xf32>)
func.func @conv2d_no_batch(%arg0: tensor<1026x1026x128xf16>, %arg1: tensor<3x3x3x128xf16>, %arg2:tensor<1024x1024x3xf32>) -> tensor<1024x1024x3xf32> {
%conv = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%arg0, %arg1 : tensor<1026x1026x128xf16>, tensor<3x3x3x128xf16>) outs(%arg2 : tensor<1024x1024x3xf32>) {
^bb0(%in: f16, %in_0: f16, %out: f32):
%0 = arith.extf %in : f16 to f32
%1 = arith.extf %in_0 : f16 to f32
%2 = arith.mulf %0, %1 : f32
%3 = arith.addf %out, %2 : f32
linalg.yield %3 : f32
} -> tensor<1024x1024x3xf32>
return %conv : tensor<1024x1024x3xf32>
}

// CHECK: %[[PAD0:.+]] = tensor.pad %[[ARG1]] low[0, 0, 0, 0] high[13, 0, 0, 0] {
// CHECK: tensor<3x3x3x128xf16> to tensor<16x3x3x128xf16>
// CHECK: %[[PAD1:.+]] = tensor.pad %[[ARG2]] low[0, 0, 0] high[0, 0, 13] {
// CHECK: tensor<1024x1024x3xf32> to tensor<1024x1024x16xf32>
// CHECK: %[[CONV:.+]] = linalg.generic {indexing_maps = [#map, #map1, #map2]
// CHECK-SAME: ins(%arg0, %[[PAD0]] : tensor<1026x1026x128xf16>, tensor<16x3x3x128xf16>)
// CHECK-SAME: outs(%[[PAD1]] : tensor<1024x1024x16xf32>) {
// CHECK: %[[RES:.+]] = tensor.extract_slice %[[CONV]][0, 0, 0] [1024, 1024, 3] [1, 1, 1]
// CHECK: return %[[RES]] : tensor<1024x1024x3xf32>

// CONVOLUTION: tensor.pad {{.*}} low[0, 0, 0, 0] high[13, 0, 0, 0]
// CONVOLUTION: tensor.pad {{.*}} low[0, 0, 0] high[0, 0, 13]

// CONTRACT-NOT: tensor.pad {{.*}} low[0, 0, 0, 0] high[13, 0, 0, 0]
// CONTRACT-NOT: tensor.pad {{.*}} low[0, 0, 0] high[0, 0, 13]

// -----

// CHECK-LABEL: func.func @main1(
// CHECK-SAME: %[[ARG0:.+]]: tensor<2x130x130x320xf16>,
// CHECK-SAME: %[[ARG1:.+]]: tensor<3x3x320x4xf16>,
Expand Down
3 changes: 2 additions & 1 deletion compiler/src/iree/compiler/Preprocessing/Passes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,8 @@ buildTransposeConvolutionPassPipeline(OpPassManager &passManager,
.addPass(GlobalOptimization::createDetachElementwiseFromNamedOpsPass)
.addPass(mlir::createLinalgNamedOpConversionPass)
.addPass(GlobalOptimization::createConvert1X1FilterConv2DToMatmulPass)
.addPass(createConvertConvToChannelsLastPass);
.addPass(createConvertConvToChannelsLastPass)
.addPass(createConvertConvFilterToChannelsLastPass);
passManager.addPass(DispatchCreation::createFoldUnitExtentDimsPass());
passManager.addPass(createCanonicalizerPass());
passManager.addPass(createCSEPass());
Expand Down
Loading