From 852b462650cf56f44a97b91e8f9cdf92dfb2c342 Mon Sep 17 00:00:00 2001 From: erwei-xilinx Date: Fri, 10 Jan 2025 03:28:55 -0800 Subject: [PATCH 01/11] Change from 'applyPatternsAndFoldGreedily' to 'applyPatternsGreedily' --- mlir/lib/Conversion/AIRRtToNpuPass.cpp | 12 ++--- mlir/lib/Conversion/AIRToAIEPass.cpp | 24 ++++----- mlir/lib/Conversion/ConvertToAIRPass.cpp | 8 +-- .../Transform/AIRDependencyScheduleOpt.cpp | 52 +++++++++---------- mlir/lib/Transform/AIRLinalgCodegen.cpp | 33 ++++++------ mlir/lib/Transform/AIRLowerLinalgTensors.cpp | 4 +- mlir/lib/Transform/AIRMiscPasses.cpp | 4 +- mlir/lib/Util/Dependency.cpp | 2 +- 8 files changed, 70 insertions(+), 69 deletions(-) diff --git a/mlir/lib/Conversion/AIRRtToNpuPass.cpp b/mlir/lib/Conversion/AIRRtToNpuPass.cpp index d37d5a6c7..dde0193a6 100644 --- a/mlir/lib/Conversion/AIRRtToNpuPass.cpp +++ b/mlir/lib/Conversion/AIRRtToNpuPass.cpp @@ -947,19 +947,19 @@ struct AIRRtToNpuPass : public impl::AIRRtToNpuBase { auto ctx = &getContext(); RewritePatternSet canoPatterns_0(ctx); xilinx::air::populateAIRLoopIndexCanonicalizationPatterns(canoPatterns_0); - (void)applyPatternsAndFoldGreedily(module, std::move(canoPatterns_0)); + (void)applyPatternsGreedily(module, std::move(canoPatterns_0)); // Specialize affine for loop nest into wraps and strides RewritePatternSet loopFoldPattern(ctx); loopFoldPattern.add(ctx); air::populateAIRLoopIndexCanonicalizationPatterns(loopFoldPattern); - (void)applyPatternsAndFoldGreedily(module, std::move(loopFoldPattern)); + (void)applyPatternsGreedily(module, std::move(loopFoldPattern)); unrollAffineFors(module); // Simplify arith ops (from airrt) RewritePatternSet canoPatterns_1(ctx); arith::IndexCastOp::getCanonicalizationPatterns(canoPatterns_1, ctx); - (void)applyPatternsAndFoldGreedily(module, std::move(canoPatterns_1)); + (void)applyPatternsGreedily(module, std::move(canoPatterns_1)); // Purge all wait ops again after unroll, in case there were loop carried // events which couldn't be purged before @@ -974,7 +974,7 @@ struct AIRRtToNpuPass : public impl::AIRRtToNpuBase { // Simplify arith ops (from airrt) RewritePatternSet canoPatterns_3(ctx); arith::IndexCastOp::getCanonicalizationPatterns(canoPatterns_3, ctx); - (void)applyPatternsAndFoldGreedily(module, std::move(canoPatterns_3)); + (void)applyPatternsGreedily(module, std::move(canoPatterns_3)); ConversionTarget target(getContext()); target.addIllegalDialect(); @@ -1020,7 +1020,7 @@ struct AIRRtToNpuPass : public impl::AIRRtToNpuBase { RewritePatternSet canoPatterns_2(ctx); canoPatterns_2.insert(ctx); arith::IndexCastOp::getCanonicalizationPatterns(canoPatterns_2, ctx); - (void)applyPatternsAndFoldGreedily(module, std::move(canoPatterns_2)); + (void)applyPatternsGreedily(module, std::move(canoPatterns_2)); // Unroll any affine for loops unrollAffineFors(module); @@ -1031,7 +1031,7 @@ struct AIRRtToNpuPass : public impl::AIRRtToNpuBase { // Cast buffers to i32 types RewritePatternSet castPattern(ctx); castPattern.add(CastFunctionArgs); - (void)applyPatternsAndFoldGreedily(module, std::move(castPattern)); + (void)applyPatternsGreedily(module, std::move(castPattern)); // Insert sync op after copying data out to host insertNpuSyncOpForResults(module); diff --git a/mlir/lib/Conversion/AIRToAIEPass.cpp b/mlir/lib/Conversion/AIRToAIEPass.cpp index 08597a952..f7529116d 100644 --- a/mlir/lib/Conversion/AIRToAIEPass.cpp +++ b/mlir/lib/Conversion/AIRToAIEPass.cpp @@ -673,7 +673,7 @@ void specializeHerdAffineIf(AIE::DeviceOp m) { auto ctx = m->getContext(); RewritePatternSet patterns(ctx); patterns.insert(ctx); - (void)applyPatternsAndFoldGreedily(m, std::move(patterns)); + (void)applyPatternsGreedily(m, std::move(patterns)); } struct LowerAIRExecutePattern : public OpRewritePattern { @@ -720,7 +720,7 @@ void lowerAirExecute(AIE::DeviceOp d) { patterns.insert(ctx); - (void)applyPatternsAndFoldGreedily(d, std::move(patterns)); + (void)applyPatternsGreedily(d, std::move(patterns)); } struct LowerScfTokenPattern : public OpRewritePattern { @@ -813,7 +813,7 @@ void lowerScfAirTokens(AIE::DeviceOp m) { auto ctx = m->getContext(); RewritePatternSet patterns(ctx); patterns.insert(ctx); - (void)applyPatternsAndFoldGreedily(m, std::move(patterns)); + (void)applyPatternsGreedily(m, std::move(patterns)); } struct AllocL1BuffersPattern : public OpRewritePattern { @@ -935,7 +935,7 @@ void allocL1Buffers(AIE::DeviceOp m, RewritePatternSet patterns(ctx); patterns.insert(ctx, tileToHerdMap, BufferId); // AllocL1TensorsPattern - (void)applyPatternsAndFoldGreedily(m, std::move(patterns)); + (void)applyPatternsGreedily(m, std::move(patterns)); } bool areReferencedByTheSameAIRChannel(Value memref_a, Value memref_b) { @@ -1027,7 +1027,7 @@ void allocL2Buffers(AIE::DeviceOp m, L2MemrefToMemTileMap(m, memrefToTileMap); patterns.insert(ctx, memrefToTileMap, bufferToMemtileMap, BufferId); - (void)applyPatternsAndFoldGreedily(m, std::move(patterns)); + (void)applyPatternsGreedily(m, std::move(patterns)); } // Remove L2 temporary buffer allocs now that @@ -1359,7 +1359,7 @@ void lowerAIRChannels( std::map linksToComplete; patterns.insert(ctx, s, bufferToMemtileMap, linksToComplete); - (void)applyPatternsAndFoldGreedily(d, std::move(patterns)); + (void)applyPatternsGreedily(d, std::move(patterns)); } struct SpecializeChannelBundlePattern @@ -1561,7 +1561,7 @@ void specializeChannelBundle( auto ctx = d->getContext(); RewritePatternSet patterns(ctx); patterns.insert(ctx, chan_to_chan_map); - (void)applyPatternsAndFoldGreedily(d, std::move(patterns)); + (void)applyPatternsGreedily(d, std::move(patterns)); } struct LowerAIRPingPongPattern : public OpRewritePattern { @@ -1618,7 +1618,7 @@ void LowerAIRPingPong(AIE::DeviceOp &d) { auto ctx = d->getContext(); RewritePatternSet patterns(ctx); patterns.insert(ctx); - (void)applyPatternsAndFoldGreedily(d, std::move(patterns)); + (void)applyPatternsGreedily(d, std::move(patterns)); } template @@ -1879,7 +1879,7 @@ class AIRToAIEPass : public air::impl::AIRToAIEBase { RewritePatternSet patterns(ctx); xilinx::air::populateAIRunrollAIRChannelPutGetInScfParallelPatterns( patterns); - (void)applyPatternsAndFoldGreedily(aie_device, std::move(patterns)); + (void)applyPatternsGreedily(aie_device, std::move(patterns)); // Substituting index operands, such as strides and offsets, to constant // zero for convenience. TODO: generalize this @@ -3251,7 +3251,7 @@ class AIRToAIEPass : public air::impl::AIRToAIEBase { } if (patterns.getNativePatterns().size()) - (void)applyPatternsAndFoldGreedily(m, std::move(patterns)); + (void)applyPatternsGreedily(m, std::move(patterns)); } void runOnOperation() override { @@ -3460,7 +3460,7 @@ class AIRToAIEPass : public air::impl::AIRToAIEBase { RewritePatternSet patterns(ctx); air::WaitAllOp::getCanonicalizationPatterns(patterns, ctx); - (void)applyPatternsAndFoldGreedily(device, std::move(patterns)); + (void)applyPatternsGreedily(device, std::move(patterns)); // Remove ops via rewrite patterns. RewritePatternSet removepatterns(ctx); @@ -3700,7 +3700,7 @@ FailureOr convertAIRToAIE(mlir::RewriterBase &rewriter, patterns.insert(ctx); patterns.insert(ctx, tileToHerdMap, BufferId); air::WaitAllOp::getCanonicalizationPatterns(patterns, ctx); - (void)applyPatternsAndFoldGreedily(aie_module, std::move(patterns)); + (void)applyPatternsGreedily(aie_module, std::move(patterns)); } return aie_module; diff --git a/mlir/lib/Conversion/ConvertToAIRPass.cpp b/mlir/lib/Conversion/ConvertToAIRPass.cpp index 18fc1034c..57a53a94b 100644 --- a/mlir/lib/Conversion/ConvertToAIRPass.cpp +++ b/mlir/lib/Conversion/ConvertToAIRPass.cpp @@ -1141,7 +1141,7 @@ struct CopyToDmaPass : public air::impl::CopyToDmaBase { linalg::getLinalgTilingCanonicalizationPatterns(context); memref::AllocOp::getCanonicalizationPatterns(stage1Patterns, context); memref::populateComposeSubViewPatterns(stage1Patterns, context); - (void)applyPatternsAndFoldGreedily(module, std::move(stage1Patterns)); + (void)applyPatternsGreedily(module, std::move(stage1Patterns)); RewritePatternSet stage2Patterns(context); stage2Patterns @@ -1174,7 +1174,7 @@ struct CopyToDmaPass : public air::impl::CopyToDmaBase { RewritePatternSet pattern(context); air::DmaMemcpyNdOp::getCanonicalizationPatterns(pattern, context); - (void)applyPatternsAndFoldGreedily(module, std::move(pattern)); + (void)applyPatternsGreedily(module, std::move(pattern)); } }; @@ -1613,7 +1613,7 @@ transform::ParToHerdOp::applyToOne(transform::TransformRewriter &rewriter, getFirstDim()); patterns.add(ctx, filteredOps, herdOps, getFirstDim()); - (void)applyPatternsAndFoldGreedily( + (void)applyPatternsGreedily( target->getParentWithTrait(), std::move(patterns)); for (auto h : herdOps) { @@ -1641,7 +1641,7 @@ transform::ParToLaunchOp::applyToOne(transform::TransformRewriter &rewriter, getHasAirSegment()); patterns.add(ctx, filteredOps, launchOps, getHasAirSegment()); - (void)applyPatternsAndFoldGreedily( + (void)applyPatternsGreedily( target->getParentWithTrait(), std::move(patterns)); for (auto l : launchOps) diff --git a/mlir/lib/Transform/AIRDependencyScheduleOpt.cpp b/mlir/lib/Transform/AIRDependencyScheduleOpt.cpp index 8612cbc24..335a9c94c 100644 --- a/mlir/lib/Transform/AIRDependencyScheduleOpt.cpp +++ b/mlir/lib/Transform/AIRDependencyScheduleOpt.cpp @@ -2676,7 +2676,7 @@ class AIRHoistDmaInAccumPattern MLIRContext *ctx = funcOp.getContext(); RewritePatternSet patterns(&getContext()); patterns.insert(ctx); - (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); + (void)applyPatternsGreedily(funcOp, std::move(patterns)); } void runOnOperation() override { @@ -2743,7 +2743,7 @@ class AIRAnnotateFrontAndBackOpsInForPattern MLIRContext *ctx = funcOp.getContext(); RewritePatternSet patterns(&getContext()); patterns.insert(ctx); - (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); + (void)applyPatternsGreedily(funcOp, std::move(patterns)); } void runOnOperation() override { @@ -2769,7 +2769,7 @@ class AIRHoistMemallocInForPattern MLIRContext *ctx = funcOp.getContext(); RewritePatternSet patterns(&getContext()); patterns.insert(ctx, clKeepMemrefDealloc); - (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); + (void)applyPatternsGreedily(funcOp, std::move(patterns)); } void runOnOperation() override { @@ -2796,7 +2796,7 @@ class AIRConstructPingPongDependencyPattern MLIRContext *ctx = funcOp.getContext(); RewritePatternSet patterns(&getContext()); patterns.insert(ctx); - (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); + (void)applyPatternsGreedily(funcOp, std::move(patterns)); } void runOnOperation() override { @@ -2877,7 +2877,7 @@ class AIRHoistOpsNotUsingPingPongPattern RewritePatternSet patterns(&getContext()); patterns.insert(ctx); - (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); + (void)applyPatternsGreedily(funcOp, std::move(patterns)); } void runOnOperation() override { @@ -2908,28 +2908,28 @@ class AIRPingPongTransformationPattern RewritePatternSet patterns(&getContext()); patterns.insert(ctx); - (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); + (void)applyPatternsGreedily(funcOp, std::move(patterns)); } void runOpAnnotationPatterns(func::FuncOp funcOp) { MLIRContext *ctx = funcOp.getContext(); RewritePatternSet patterns(&getContext()); patterns.insert(ctx); - (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); + (void)applyPatternsGreedily(funcOp, std::move(patterns)); } void runHoistMemallocPatterns(func::FuncOp funcOp) { MLIRContext *ctx = funcOp.getContext(); RewritePatternSet patterns(&getContext()); patterns.insert(ctx, clKeepMemrefDealloc); - (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); + (void)applyPatternsGreedily(funcOp, std::move(patterns)); } void runConstructPingPongDependencyPatterns(func::FuncOp funcOp) { MLIRContext *ctx = funcOp.getContext(); RewritePatternSet patterns(&getContext()); patterns.insert(ctx); - (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); + (void)applyPatternsGreedily(funcOp, std::move(patterns)); } void runLoopUnroll(func::FuncOp funcOp) { @@ -2995,7 +2995,7 @@ class AIRLabelScfForLoopForPingPongPattern MLIRContext *ctx = funcOp.getContext(); RewritePatternSet patterns(&getContext()); patterns.insert(ctx); - (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); + (void)applyPatternsGreedily(funcOp, std::move(patterns)); } void runOnOperation() override { @@ -3022,7 +3022,7 @@ class AIRLabelScfForLoopInAIRSegmentPattern MLIRContext *ctx = funcOp.getContext(); RewritePatternSet patterns(&getContext()); patterns.insert(ctx); - (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); + (void)applyPatternsGreedily(funcOp, std::move(patterns)); } void runOnOperation() override { @@ -3049,7 +3049,7 @@ AIRSpecializeChannelWrapAndStrideImpl(Region *region, int maxNumDims = -1, mlir::affine::AffineApplyOp::getCanonicalizationPatterns(preproc_patterns, ctx); air::WaitAllOp::getCanonicalizationPatterns(preproc_patterns, ctx); - (void)applyPatternsAndFoldGreedily(*region, std::move(preproc_patterns)); + (void)applyPatternsGreedily(*region, std::move(preproc_patterns)); RewritePatternSet patterns(ctx); patterns @@ -3059,14 +3059,14 @@ AIRSpecializeChannelWrapAndStrideImpl(Region *region, int maxNumDims = -1, AIRSpecializeChannelWrapAndStrideInAffineFor>(ctx); patterns.insert(ctx, maxNumDims); affine::AffineApplyOp::getCanonicalizationPatterns(patterns, ctx); - (void)applyPatternsAndFoldGreedily(*region, std::move(patterns)); + (void)applyPatternsGreedily(*region, std::move(patterns)); // Unroll any remaining loops which contain only data movements. if (enableForLoopUnrolling) { RewritePatternSet unroll_patterns(ctx); unroll_patterns .insert(ctx); - (void)applyPatternsAndFoldGreedily(*region, std::move(unroll_patterns)); + (void)applyPatternsGreedily(*region, std::move(unroll_patterns)); } // Canonicalize wrap and stride list to remove redundant dimensions @@ -3074,7 +3074,7 @@ AIRSpecializeChannelWrapAndStrideImpl(Region *region, int maxNumDims = -1, cano_patterns.insert(ctx); ExecuteOp::getCanonicalizationPatterns(cano_patterns, ctx); - (void)applyPatternsAndFoldGreedily(*region, std::move(cano_patterns)); + (void)applyPatternsGreedily(*region, std::move(cano_patterns)); return success(); } @@ -3163,7 +3163,7 @@ class AIRDependencyScheduleOpt MLIRContext *ctx = funcOp.getContext(); RewritePatternSet patterns(&getContext()); patterns.insert(ctx); - (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); + (void)applyPatternsGreedily(funcOp, std::move(patterns)); } void runOnFunction(func::FuncOp f) { @@ -3208,7 +3208,7 @@ class AIREnforceLoopCarriedMemrefDeallocPattern MLIRContext *ctx = funcOp.getContext(); RewritePatternSet patterns(&getContext()); patterns.insert(ctx); - (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); + (void)applyPatternsGreedily(funcOp, std::move(patterns)); } void runOnFunction(func::FuncOp f) { runOptPatterns(f); } @@ -3548,7 +3548,7 @@ class AIRFuseChannels RewritePatternSet patterns(ctx); air::WaitAllOp::getCanonicalizationPatterns(patterns, ctx); scf::ForOp::getCanonicalizationPatterns(patterns, ctx); - (void)applyPatternsAndFoldGreedily(f, std::move(patterns)); + (void)applyPatternsGreedily(f, std::move(patterns)); } } @@ -4304,7 +4304,7 @@ struct IsolateAsyncDmaLoopNestInSCFForPattern // If necessary, hoist allocs out of the loops, too. RewritePatternSet patterns(f.getContext()); patterns.insert(f.getContext(), false); - (void)applyPatternsAndFoldGreedily(f, std::move(patterns)); + (void)applyPatternsGreedily(f, std::move(patterns)); // Hoist ops out of each scf.for. for (auto set : target_ops_sets) @@ -4437,7 +4437,7 @@ LogicalResult AIRIsolateAsyncDmaLoopNestsImpl(Region *region) { air::HerdOp::getCanonicalizationPatterns(patterns_1, ctx); air::WaitAllOp::getCanonicalizationPatterns(patterns_1, ctx); scf::ForOp::getCanonicalizationPatterns(patterns_1, ctx); - (void)applyPatternsAndFoldGreedily(*region, std::move(patterns_1)); + (void)applyPatternsGreedily(*region, std::move(patterns_1)); return success(); } @@ -5505,14 +5505,14 @@ class AIRLoopFusion ctx); air::WaitAllOp::getCanonicalizationPatterns(patterns, ctx); air::ExecuteOp::getCanonicalizationPatterns(patterns, ctx); - (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); + (void)applyPatternsGreedily(funcOp, std::move(patterns)); } void runPostProcPatterns(func::FuncOp funcOp) { MLIRContext *ctx = funcOp.getContext(); RewritePatternSet patterns(&getContext()); patterns.insert(ctx); - (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); + (void)applyPatternsGreedily(funcOp, std::move(patterns)); // Update func.call declaration post memref shrinkage SmallVector shrunkMemallocs; funcOp.walk([&](memref::AllocOp op) { @@ -5568,7 +5568,7 @@ class AIRLoopFusion "Must be one of [segment, launch, all]."); signalPassFailure(); } - (void)applyPatternsAndFoldGreedily(func, std::move(patterns)); + (void)applyPatternsGreedily(func, std::move(patterns)); runPostProcPatterns(func); func.walk([&](memref::AllocOp op) { op->removeAttr("shrinkage"); }); } @@ -5627,7 +5627,7 @@ class AIROptimizeShimDMABDs /*maxNumDims*/ maxNumDims, /*enableForLoopUnrolling*/ false); RewritePatternSet patterns(ctx); populateAIRLoopFusionPattern(patterns); - (void)applyPatternsAndFoldGreedily(func, std::move(patterns)); + (void)applyPatternsGreedily(func, std::move(patterns)); } private: @@ -5943,7 +5943,7 @@ class AIRFuseAllocDealloc AIRFuseAllocDeallocToAIRHierarchy, AIRFuseAllocDeallocToAIRHierarchy, AIRFuseAllocDeallocToAIRHierarchy>(ctx); - (void)applyPatternsAndFoldGreedily(func, std::move(patterns)); + (void)applyPatternsGreedily(func, std::move(patterns)); } private: @@ -5965,7 +5965,7 @@ class AIRShrinkMemrefSizesByAccess auto funcOp = getOperation(); RewritePatternSet patterns(&getContext()); patterns.insert(ctx); - (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); + (void)applyPatternsGreedily(funcOp, std::move(patterns)); // Update func.call declaration after memref shrinkage SmallVector shrunkMemallocs; funcOp.walk([&](memref::AllocOp op) { diff --git a/mlir/lib/Transform/AIRLinalgCodegen.cpp b/mlir/lib/Transform/AIRLinalgCodegen.cpp index 5c07521f7..d8bfe33eb 100644 --- a/mlir/lib/Transform/AIRLinalgCodegen.cpp +++ b/mlir/lib/Transform/AIRLinalgCodegen.cpp @@ -673,7 +673,8 @@ struct LinalgTransformationFilter { return *this; } - template LinalgTransformationFilter &addOpFilter() { + template + LinalgTransformationFilter &addOpFilter() { return addFilter( [](Operation *op) { return success(isa(op)); }); } @@ -1122,7 +1123,7 @@ void AIRPipelineReducePass::runOnOperation() { clPipelineDepth, clPipelineDirection, clPromoteSubViews); - (void)applyPatternsAndFoldGreedily(func, std::move(patterns)); + (void)applyPatternsGreedily(func, std::move(patterns)); } class AIRLinalgCodegen : public air::impl::AIRLinalgCodegenBase { @@ -1145,7 +1146,7 @@ class AIRLinalgCodegen RemoveAllocLinalgOpCopyPattern, RemoveExtraAllocPattern, RemoveAllocCopyLinalgOpCopyPattern, RemoveDeadCopyPattern, RemoveFillCopyLinalgPattern>(ctx); - (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); + (void)applyPatternsGreedily(funcOp, std::move(patterns)); } /// Collect perfectly nested loops starting from `rootForOps`. Loops are @@ -1361,7 +1362,7 @@ class AIRLinalgCodegen stageL2Patterns.insert(ctx); stageL2Patterns.insert(ctx); scf::populateSCFForLoopCanonicalizationPatterns(stageL2Patterns); - (void)applyPatternsAndFoldGreedily(called, std::move(stageL2Patterns)); + (void)applyPatternsGreedily(called, std::move(stageL2Patterns)); LLVM_DEBUG(llvm::outs() << "After L2 Tiling\n"); LLVM_DEBUG(called.print(llvm::outs())); @@ -1416,7 +1417,7 @@ class AIRLinalgCodegen .setLoopType(linalg::LinalgTilingLoopType::ParallelLoops), LinalgTransformationFilter(next_match, StringAttr::get(ctx, "herd_tiling"))); - (void)applyPatternsAndFoldGreedily(called, std::move(patterns)); + (void)applyPatternsGreedily(called, std::move(patterns)); next_match = StringAttr::get(ctx, "herd_tiling"); LLVM_DEBUG(llvm::outs() << "After Herd Tiling\n"); @@ -1449,12 +1450,12 @@ class AIRLinalgCodegen stageL1Patterns.insert(ctx, 2); stageL1Patterns.insert(ctx); scf::populateSCFForLoopCanonicalizationPatterns(stageL1Patterns); - (void)applyPatternsAndFoldGreedily(called, std::move(stageL1Patterns)); + (void)applyPatternsGreedily(called, std::move(stageL1Patterns)); RewritePatternSet stage3Patterns(&getContext()); stage3Patterns.insert(ctx); stage3Patterns.insert(ctx); - (void)applyPatternsAndFoldGreedily(called, std::move(stage3Patterns)); + (void)applyPatternsGreedily(called, std::move(stage3Patterns)); LLVM_DEBUG(llvm::outs() << "After L1 Tiling\n"); LLVM_DEBUG(called.print(llvm::outs())); @@ -1554,7 +1555,7 @@ class AIRLinalgCodegen stageL2Patterns.insert(ctx); stageL2Patterns.insert(ctx); scf::populateSCFForLoopCanonicalizationPatterns(stageL2Patterns); - (void)applyPatternsAndFoldGreedily(called, std::move(stageL2Patterns)); + (void)applyPatternsGreedily(called, std::move(stageL2Patterns)); next_match = StringAttr::get(ctx, "L2_promoted"); } @@ -1585,8 +1586,8 @@ class AIRLinalgCodegen stage3Patterns.insert(ctx); scf::populateSCFForLoopCanonicalizationPatterns(stage3Patterns); - (void)applyPatternsAndFoldGreedily(called, std::move(stageL1Patterns)); - (void)applyPatternsAndFoldGreedily(called, std::move(stage3Patterns)); + (void)applyPatternsGreedily(called, std::move(stageL1Patterns)); + (void)applyPatternsGreedily(called, std::move(stage3Patterns)); called.walk([](linalg::LinalgOp op) { op->removeAttr(air::LinalgTransforms::kLinalgTransformMarker); }); @@ -1650,9 +1651,9 @@ class AIRLinalgCodegen stage3Patterns.insert(ctx); stage3Patterns.insert(ctx, 2); - (void)applyPatternsAndFoldGreedily(called, std::move(stage1Patterns)); - (void)applyPatternsAndFoldGreedily(called, std::move(stage2Patterns)); - (void)applyPatternsAndFoldGreedily(called, std::move(stage3Patterns)); + (void)applyPatternsGreedily(called, std::move(stage1Patterns)); + (void)applyPatternsGreedily(called, std::move(stage2Patterns)); + (void)applyPatternsGreedily(called, std::move(stage3Patterns)); /// scf.parallel transform from herd dimension /// Step-1: Capture the perfectly nested scf.for loops @@ -1721,7 +1722,7 @@ class AIRLinalgCodegen // RewritePatternSet prePatterns(&getContext()); // prePatterns.insert(&getContext()); - //(void)applyPatternsAndFoldGreedily(f, std::move(prePatterns)); + //(void)applyPatternsGreedily(f, std::move(prePatterns)); if (!clLinalgCodegenTestPatterns) { runMatmulPatterns(f); runConv2dPatterns(f); @@ -2101,8 +2102,8 @@ transform::LinalgPromoteOp::apply(transform::TransformRewriter &rewriter, // to: // memref.alloc() : memref<32x32xi32, 2> memref::AllocOp::getCanonicalizationPatterns(patterns, ctx); - (void)applyPatternsAndFoldGreedily( - payloadOps[0]->getParentOfType(), std::move(patterns)); + (void)applyPatternsGreedily(payloadOps[0]->getParentOfType(), + std::move(patterns)); if (!transformed.size()) return emitDefaultDefiniteFailure(payloadOps[0]); diff --git a/mlir/lib/Transform/AIRLowerLinalgTensors.cpp b/mlir/lib/Transform/AIRLowerLinalgTensors.cpp index c5f713f03..eeae62d04 100644 --- a/mlir/lib/Transform/AIRLowerLinalgTensors.cpp +++ b/mlir/lib/Transform/AIRLowerLinalgTensors.cpp @@ -158,12 +158,12 @@ void AIRLowerLinalgTensors::runOnOperation() { patterns1.add(&context); // RemoveAllocCopyPattern, // RemoveTensorLoadStorePattern - (void)applyPatternsAndFoldGreedily(aie_module, std::move(patterns1)); + (void)applyPatternsGreedily(aie_module, std::move(patterns1)); RewritePatternSet patterns2(&context); linalg::populateLinalgNamedOpsGeneralizationPatterns(patterns2); patterns2.add(&context); - (void)applyPatternsAndFoldGreedily(aie_module, std::move(patterns2)); + (void)applyPatternsGreedily(aie_module, std::move(patterns2)); } namespace xilinx { diff --git a/mlir/lib/Transform/AIRMiscPasses.cpp b/mlir/lib/Transform/AIRMiscPasses.cpp index fab86867b..5858a1783 100644 --- a/mlir/lib/Transform/AIRMiscPasses.cpp +++ b/mlir/lib/Transform/AIRMiscPasses.cpp @@ -657,7 +657,7 @@ void AIRLowerHerdParallelPass::runOnOperation() { auto context = op->getContext(); RewritePatternSet patterns(context); patterns.add(context); - (void)applyPatternsAndFoldGreedily(op, std::move(patterns)); + (void)applyPatternsGreedily(op, std::move(patterns)); } class AIRLabelBroadcastChannelWithTilePass @@ -1715,7 +1715,7 @@ void AIRSplitL2MemrefForBufferConstraintPass::runOnOperation() { mlir::arith::ConstantIndexOp::getCanonicalizationPatterns(canoPatterns, context); air::ExecuteOp::getCanonicalizationPatterns(canoPatterns, context); - (void)applyPatternsAndFoldGreedily(func, std::move(canoPatterns)); + (void)applyPatternsGreedily(func, std::move(canoPatterns)); // Split memrefs. allocOps.clear(); diff --git a/mlir/lib/Util/Dependency.cpp b/mlir/lib/Util/Dependency.cpp index e8427e8b8..abc7a39e7 100644 --- a/mlir/lib/Util/Dependency.cpp +++ b/mlir/lib/Util/Dependency.cpp @@ -1973,7 +1973,7 @@ void dependencyCanonicalizer::removeRedundantWaitAllOps(func::FuncOp func) { auto ctx = func.getContext(); RewritePatternSet patterns(ctx); air::WaitAllOp::getCanonicalizationPatterns(patterns, ctx); - (void)applyPatternsAndFoldGreedily(func, std::move(patterns)); + (void)applyPatternsGreedily(func, std::move(patterns)); } // Get number of cores in herd From 768ae626e718b4d023e0e49321d92382c7177cd0 Mon Sep 17 00:00:00 2001 From: erwei-xilinx Date: Fri, 10 Jan 2025 03:29:29 -0800 Subject: [PATCH 02/11] Update llvm and mlir-aie --- utils/clone-llvm.sh | 2 +- utils/clone-mlir-aie.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/clone-llvm.sh b/utils/clone-llvm.sh index e03de9b77..a1d92a780 100755 --- a/utils/clone-llvm.sh +++ b/utils/clone-llvm.sh @@ -14,7 +14,7 @@ # ##===----------------------------------------------------------------------===## -export commithash=f334db92be168876b618db72dc93078ce23ffa89 +export commithash=f926bcf9068c808b643a56322b7ef6910eb36599 target_dir=llvm # clone llvm if it is not there already diff --git a/utils/clone-mlir-aie.sh b/utils/clone-mlir-aie.sh index 47b4e967e..975cd0ff5 100755 --- a/utils/clone-mlir-aie.sh +++ b/utils/clone-mlir-aie.sh @@ -14,7 +14,7 @@ # ##===----------------------------------------------------------------------===## -export HASH=4d613f9c7e140c82299cd6e914a4047592ead635 +export HASH=1b059e4996656d00499598c8c1ce8da6baa71262 target_dir=mlir-aie if [[ ! -d $target_dir ]]; then From a7bef9249da54c53afb1a9a762e130d91e14337d Mon Sep 17 00:00:00 2001 From: erwei-xilinx Date: Fri, 10 Jan 2025 03:50:58 -0800 Subject: [PATCH 03/11] Disable -Werror=sign-compare due to issue with nanobind 2.4 --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index aba242522..08696e4ec 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -108,7 +108,7 @@ include_directories(${AIE_INCLUDE_DIRS}) add_definitions(${LLVM_DEFINITIONS}) ## flags duplicated from mlir-aie -add_flag_if_supported("-Werror=sign-compare" WERROR_SIGN_COMPARE) +# add_flag_if_supported("-Werror=sign-compare" WERROR_SIGN_COMPARE) add_flag_if_supported("-Werror=unused" WERROR_USED) # What happens when you have a non-void function with no return? # No `ret` instruction is generated and so execution of that function just From b99e04ff7c50bf1849da2f430b1a2d0123023f0b Mon Sep 17 00:00:00 2001 From: erwei-xilinx Date: Fri, 10 Jan 2025 04:14:00 -0800 Subject: [PATCH 04/11] Add convert-arith-to-llvm pass to pipeline --- mlir/test/Util/Channel/2mm/mmult.mlir | 2 +- mlir/test/Util/Channel/async/producer_consumer.mlir | 2 +- mlir/test/Util/Channel/broadcast/broadcast.mlir | 2 +- mlir/test/Util/Channel/serial/channel_op_lowering.mlir | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mlir/test/Util/Channel/2mm/mmult.mlir b/mlir/test/Util/Channel/2mm/mmult.mlir index 43e602aa2..050bd5a24 100644 --- a/mlir/test/Util/Channel/2mm/mmult.mlir +++ b/mlir/test/Util/Channel/2mm/mmult.mlir @@ -5,7 +5,7 @@ // //===----------------------------------------------------------------------===// -// RUN: air-opt -o %T/mmult.async.llvm.mlir %s -async-to-async-runtime -async-runtime-ref-counting -async-runtime-ref-counting-opt -convert-linalg-to-affine-loops -expand-strided-metadata -lower-affine -convert-scf-to-cf -convert-async-to-llvm -finalize-memref-to-llvm -convert-cf-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts -canonicalize -cse +// RUN: air-opt -o %T/mmult.async.llvm.mlir %s -async-to-async-runtime -async-runtime-ref-counting -async-runtime-ref-counting-opt -convert-linalg-to-affine-loops -expand-strided-metadata -lower-affine -convert-scf-to-cf -convert-async-to-llvm -convert-arith-to-llvm -finalize-memref-to-llvm -convert-cf-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts -canonicalize -cse // RUN: air-translate --mlir-to-llvmir %T/mmult.async.llvm.mlir -o %T/mmult.async.ll // RUN: %OPT -O3 -o %T/mmult.async.opt.bc < %T/mmult.async.ll // RUN: %LLC %T/mmult.async.opt.bc --relocation-model=pic -filetype=obj -o %T/mmult.async.o diff --git a/mlir/test/Util/Channel/async/producer_consumer.mlir b/mlir/test/Util/Channel/async/producer_consumer.mlir index c3f9c36b6..6b603dd81 100644 --- a/mlir/test/Util/Channel/async/producer_consumer.mlir +++ b/mlir/test/Util/Channel/async/producer_consumer.mlir @@ -5,7 +5,7 @@ // //===----------------------------------------------------------------------===// -// RUN: air-opt -o %T/producer_consumer.async.llvm.mlir %s -async-to-async-runtime -async-runtime-ref-counting -async-runtime-ref-counting-opt -convert-linalg-to-affine-loops -expand-strided-metadata -lower-affine -convert-scf-to-cf -convert-async-to-llvm -finalize-memref-to-llvm -convert-cf-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts -canonicalize -cse +// RUN: air-opt -o %T/producer_consumer.async.llvm.mlir %s -async-to-async-runtime -async-runtime-ref-counting -async-runtime-ref-counting-opt -convert-linalg-to-affine-loops -expand-strided-metadata -lower-affine -convert-scf-to-cf -convert-async-to-llvm -convert-arith-to-llvm -finalize-memref-to-llvm -convert-cf-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts -canonicalize -cse // RUN: air-translate --mlir-to-llvmir %T/producer_consumer.async.llvm.mlir -o %T/producer_consumer.async.ll // RUN: %OPT -O3 -o %T/producer_consumer.async.opt.bc < %T/producer_consumer.async.ll // RUN: %LLC %T/producer_consumer.async.opt.bc --relocation-model=pic -filetype=obj -o %T/producer_consumer.async.o diff --git a/mlir/test/Util/Channel/broadcast/broadcast.mlir b/mlir/test/Util/Channel/broadcast/broadcast.mlir index d61c9510a..67fe3469c 100644 --- a/mlir/test/Util/Channel/broadcast/broadcast.mlir +++ b/mlir/test/Util/Channel/broadcast/broadcast.mlir @@ -6,7 +6,7 @@ //===----------------------------------------------------------------------===// -// RUN: air-opt -o %T/broadcast.llvm.mlir %s -buffer-results-to-out-params -air-to-async -async-to-async-runtime -async-runtime-ref-counting -async-runtime-ref-counting-opt -convert-linalg-to-affine-loops -expand-strided-metadata -lower-affine -convert-scf-to-cf -convert-async-to-llvm -finalize-memref-to-llvm -convert-cf-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts -canonicalize -cse +// RUN: air-opt -o %T/broadcast.llvm.mlir %s -buffer-results-to-out-params -air-to-async -async-to-async-runtime -async-runtime-ref-counting -async-runtime-ref-counting-opt -convert-linalg-to-affine-loops -expand-strided-metadata -lower-affine -convert-scf-to-cf -convert-async-to-llvm -convert-arith-to-llvm -finalize-memref-to-llvm -convert-cf-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts -canonicalize -cse // RUN: air-translate --mlir-to-llvmir %T/broadcast.llvm.mlir -o %T/broadcast.ll // RUN: %OPT -O3 -o %T/broadcast.opt.bc < %T/broadcast.ll // RUN: %LLC %T/broadcast.opt.bc --relocation-model=pic -filetype=obj -o %T/broadcast.o diff --git a/mlir/test/Util/Channel/serial/channel_op_lowering.mlir b/mlir/test/Util/Channel/serial/channel_op_lowering.mlir index e812a20a2..f52bb3133 100644 --- a/mlir/test/Util/Channel/serial/channel_op_lowering.mlir +++ b/mlir/test/Util/Channel/serial/channel_op_lowering.mlir @@ -6,7 +6,7 @@ //===----------------------------------------------------------------------===// -// RUN: air-opt -o %T/channel.async.llvm.mlir %s -buffer-results-to-out-params -air-to-async -async-to-async-runtime -async-runtime-ref-counting -async-runtime-ref-counting-opt -convert-linalg-to-affine-loops -expand-strided-metadata -lower-affine -convert-scf-to-cf -convert-async-to-llvm -finalize-memref-to-llvm -convert-cf-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts -canonicalize -cse +// RUN: air-opt -o %T/channel.async.llvm.mlir %s -buffer-results-to-out-params -air-to-async -async-to-async-runtime -async-runtime-ref-counting -async-runtime-ref-counting-opt -convert-linalg-to-affine-loops -expand-strided-metadata -lower-affine -convert-scf-to-cf -convert-async-to-llvm -convert-arith-to-llvm -finalize-memref-to-llvm -convert-cf-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts -canonicalize -cse // RUN: air-translate --mlir-to-llvmir %T/channel.async.llvm.mlir -o %T/channel.async.ll // RUN: %OPT -O3 -o %T/channel.async.opt.bc < %T/channel.async.ll // RUN: %LLC %T/channel.async.opt.bc --relocation-model=pic -filetype=obj -o %T/channel.async.o From 929eca4d85cde6065c997a13fbf06e97f64c6461 Mon Sep 17 00:00:00 2001 From: erwei-xilinx Date: Mon, 13 Jan 2025 00:39:36 -0800 Subject: [PATCH 05/11] Switch air python bindings from pybind11 to nanobind --- CMakeLists.txt | 2 +- python/AIRMLIRModule.cpp | 16 +++++++++------- python/AirHostModule.cpp | 40 +++++++++++++++++++++------------------- python/CMakeLists.txt | 4 ++++ 4 files changed, 35 insertions(+), 27 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 08696e4ec..aba242522 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -108,7 +108,7 @@ include_directories(${AIE_INCLUDE_DIRS}) add_definitions(${LLVM_DEFINITIONS}) ## flags duplicated from mlir-aie -# add_flag_if_supported("-Werror=sign-compare" WERROR_SIGN_COMPARE) +add_flag_if_supported("-Werror=sign-compare" WERROR_SIGN_COMPARE) add_flag_if_supported("-Werror=unused" WERROR_USED) # What happens when you have a non-void function with no return? # No `ret` instruction is generated and so execution of that function just diff --git a/python/AIRMLIRModule.cpp b/python/AIRMLIRModule.cpp index 7f24244e8..c1d47529f 100644 --- a/python/AIRMLIRModule.cpp +++ b/python/AIRMLIRModule.cpp @@ -6,17 +6,18 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Bindings/Python/PybindAdaptors.h" +#include "mlir/Bindings/Python/NanobindAdaptors.h" #include "air-c/Dialects.h" #include "air-c/Registration.h" #include "air-c/Runner.h" #include "air-c/Transform.h" -namespace py = pybind11; -using namespace mlir::python::adaptors; +namespace nb = nanobind; +using namespace nb::literals; +using namespace mlir::python; -PYBIND11_MODULE(_air, m) { +NB_MODULE(_air, m) { ::airRegisterAllPasses(); @@ -36,14 +37,15 @@ PYBIND11_MODULE(_air, m) { "registry"_a); // AIR types bindings - mlir_type_subclass(m, "AsyncTokenType", mlirTypeIsAIRAsyncTokenType) + nanobind_adaptors::mlir_type_subclass(m, "AsyncTokenType", + mlirTypeIsAIRAsyncTokenType) .def_classmethod( "get", - [](const py::object &cls, MlirContext ctx) { + [](const nb::object &cls, MlirContext ctx) { return cls(mlirAIRAsyncTokenTypeGet(ctx)); }, "Get an instance of AsyncTokenType in given context.", - py::arg("self"), py::arg("ctx") = py::none()); + nb::arg("self"), nb::arg("ctx") = nb::none()); m.def("run_transform", ::runTransform); diff --git a/python/AirHostModule.cpp b/python/AirHostModule.cpp index 2eabedd07..9f0f9a3c8 100644 --- a/python/AirHostModule.cpp +++ b/python/AirHostModule.cpp @@ -7,9 +7,9 @@ //===----------------------------------------------------------------------===// #include -#include -#include -#include +#include +#include +#include #include "air.hpp" #include "hsa/hsa.h" @@ -17,14 +17,14 @@ #define STRINGIFY(x) #x #define MACRO_STRINGIFY(x) STRINGIFY(x) -namespace py = pybind11; +namespace nb = nanobind; namespace { -void defineAIRHostModule(pybind11::module &m) { +void defineAIRHostModule(nanobind::module &m) { m.def( "init_libxaie", []() -> uint64_t { return (uint64_t)air_init_libxaie(); }, - pybind11::return_value_policy::reference); + nanobind::return_value_policy::reference); m.def("deinit_libxaie", [](uint64_t ctx) -> void { air_deinit_libxaie((air_libxaie_ctx_t)ctx); @@ -34,7 +34,7 @@ void defineAIRHostModule(pybind11::module &m) { m.def("shut_down", []() -> uint64_t { return (uint64_t)air_shut_down(); }); - pybind11::class_(m, "ModuleDescriptor") + nanobind::class_(m, "ModuleDescriptor") .def( "getSegments", [](const air_module_desc_t &d) -> std::vector { @@ -43,9 +43,10 @@ void defineAIRHostModule(pybind11::module &m) { segments.push_back(d.segment_descs[i]); return segments; }, - pybind11::return_value_policy::reference); + nanobind::return_value_policy::reference); + // pybind11::return_value_policy::reference); - pybind11::class_(m, "SegmentDescriptor") + nanobind::class_(m, "SegmentDescriptor") .def( "getHerds", [](const air_segment_desc_t &d) -> std::vector { @@ -54,12 +55,12 @@ void defineAIRHostModule(pybind11::module &m) { herds.push_back(d.herd_descs[i]); return herds; }, - pybind11::return_value_policy::reference) + nanobind::return_value_policy::reference) .def("getName", [](const air_segment_desc_t &d) -> std::string { return {d.name, static_cast(d.name_length)}; }); - pybind11::class_(m, "HerdDescriptor") + nanobind::class_(m, "HerdDescriptor") .def("getName", [](const air_herd_desc_t &d) -> std::string { return {d.name, static_cast(d.name_length)}; }); @@ -73,9 +74,9 @@ void defineAIRHostModule(pybind11::module &m) { m.def("module_unload", &air_module_unload); m.def("get_module_descriptor", &air_module_get_desc, - pybind11::return_value_policy::reference); + nanobind::return_value_policy::reference); - pybind11::class_ Agent(m, "Agent"); + nanobind::class_ Agent(m, "Agent"); m.def( "get_agents", @@ -84,9 +85,10 @@ void defineAIRHostModule(pybind11::module &m) { air_get_agents(agents); return agents; }, - pybind11::return_value_policy::reference); + nanobind::return_value_policy::reference); + // pybind11::return_value_policy::reference); - pybind11::class_ Queue(m, "Queue"); + nanobind::class_ Queue(m, "Queue"); m.def( "queue_create", @@ -109,11 +111,11 @@ void defineAIRHostModule(pybind11::module &m) { return nullptr; return q; }, - pybind11::return_value_policy::reference); + nanobind::return_value_policy::reference); m.def( "read32", [](uint64_t addr) -> uint32_t { return air_read32(addr); }, - pybind11::return_value_policy::copy); + nanobind::return_value_policy::copy); m.def("write32", [](uint64_t addr, uint32_t val) -> void { return air_write32(addr, val); @@ -124,12 +126,12 @@ void defineAIRHostModule(pybind11::module &m) { [](uint32_t col, uint32_t row) -> uint64_t { return air_get_tile_addr(col, row); }, - pybind11::return_value_policy::copy); + nanobind::return_value_policy::copy); } } // namespace -PYBIND11_MODULE(_airRt, m) { +NB_MODULE(_airRt, m) { m.doc() = R"pbdoc( AIR Runtime Python bindings -------------------------- diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index ac5781f26..979407b72 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -62,6 +62,8 @@ declare_mlir_python_extension(AirPythonExtensions.MLIR AIRCAPI PRIVATE_LINK_LIBS LLVMSupport + PYTHON_BINDINGS_LIBRARY + nanobind ) # Only building this if we are building the hsa runtime, as it requires hsa.h @@ -77,6 +79,8 @@ if(hsa-runtime64_FOUND) AIRCAPI PRIVATE_LINK_LIBS LLVMSupport + PYTHON_BINDINGS_LIBRARY + nanobind ) endif() From 7c1ede777a40db547933c43b90e31e8ae8441f3e Mon Sep 17 00:00:00 2001 From: erwei-xilinx Date: Mon, 13 Jan 2025 01:00:44 -0800 Subject: [PATCH 06/11] Disable -Werror=sign-compare due to issue with nanobind 2.4 --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index aba242522..08696e4ec 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -108,7 +108,7 @@ include_directories(${AIE_INCLUDE_DIRS}) add_definitions(${LLVM_DEFINITIONS}) ## flags duplicated from mlir-aie -add_flag_if_supported("-Werror=sign-compare" WERROR_SIGN_COMPARE) +# add_flag_if_supported("-Werror=sign-compare" WERROR_SIGN_COMPARE) add_flag_if_supported("-Werror=unused" WERROR_USED) # What happens when you have a non-void function with no return? # No `ret` instruction is generated and so execution of that function just From 56befd85a538e1bb420a981397ab4c8ea73cee0f Mon Sep 17 00:00:00 2001 From: erwei-xilinx Date: Mon, 13 Jan 2025 01:20:46 -0800 Subject: [PATCH 07/11] Fixup typo nb::module_ (missing underscore) --- python/AirHostModule.cpp | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/python/AirHostModule.cpp b/python/AirHostModule.cpp index 9f0f9a3c8..1a61b8767 100644 --- a/python/AirHostModule.cpp +++ b/python/AirHostModule.cpp @@ -20,11 +20,11 @@ namespace nb = nanobind; namespace { -void defineAIRHostModule(nanobind::module &m) { +void defineAIRHostModule(nb::module_ &m) { m.def( "init_libxaie", []() -> uint64_t { return (uint64_t)air_init_libxaie(); }, - nanobind::return_value_policy::reference); + nb::return_value_policy::reference); m.def("deinit_libxaie", [](uint64_t ctx) -> void { air_deinit_libxaie((air_libxaie_ctx_t)ctx); @@ -34,7 +34,7 @@ void defineAIRHostModule(nanobind::module &m) { m.def("shut_down", []() -> uint64_t { return (uint64_t)air_shut_down(); }); - nanobind::class_(m, "ModuleDescriptor") + nb::class_(m, "ModuleDescriptor") .def( "getSegments", [](const air_module_desc_t &d) -> std::vector { @@ -43,10 +43,9 @@ void defineAIRHostModule(nanobind::module &m) { segments.push_back(d.segment_descs[i]); return segments; }, - nanobind::return_value_policy::reference); - // pybind11::return_value_policy::reference); + nb::return_value_policy::reference); - nanobind::class_(m, "SegmentDescriptor") + nb::class_(m, "SegmentDescriptor") .def( "getHerds", [](const air_segment_desc_t &d) -> std::vector { @@ -55,12 +54,12 @@ void defineAIRHostModule(nanobind::module &m) { herds.push_back(d.herd_descs[i]); return herds; }, - nanobind::return_value_policy::reference) + nb::return_value_policy::reference) .def("getName", [](const air_segment_desc_t &d) -> std::string { return {d.name, static_cast(d.name_length)}; }); - nanobind::class_(m, "HerdDescriptor") + nb::class_(m, "HerdDescriptor") .def("getName", [](const air_herd_desc_t &d) -> std::string { return {d.name, static_cast(d.name_length)}; }); @@ -74,9 +73,9 @@ void defineAIRHostModule(nanobind::module &m) { m.def("module_unload", &air_module_unload); m.def("get_module_descriptor", &air_module_get_desc, - nanobind::return_value_policy::reference); + nb::return_value_policy::reference); - nanobind::class_ Agent(m, "Agent"); + nb::class_ Agent(m, "Agent"); m.def( "get_agents", @@ -85,10 +84,9 @@ void defineAIRHostModule(nanobind::module &m) { air_get_agents(agents); return agents; }, - nanobind::return_value_policy::reference); - // pybind11::return_value_policy::reference); + nb::return_value_policy::reference); - nanobind::class_ Queue(m, "Queue"); + nb::class_ Queue(m, "Queue"); m.def( "queue_create", @@ -111,11 +109,11 @@ void defineAIRHostModule(nanobind::module &m) { return nullptr; return q; }, - nanobind::return_value_policy::reference); + nb::return_value_policy::reference); m.def( "read32", [](uint64_t addr) -> uint32_t { return air_read32(addr); }, - nanobind::return_value_policy::copy); + nb::return_value_policy::copy); m.def("write32", [](uint64_t addr, uint32_t val) -> void { return air_write32(addr, val); @@ -126,7 +124,7 @@ void defineAIRHostModule(nanobind::module &m) { [](uint32_t col, uint32_t row) -> uint64_t { return air_get_tile_addr(col, row); }, - nanobind::return_value_policy::copy); + nb::return_value_policy::copy); } } // namespace From 98a9a989d0f9ad3a753a4fc100d9831609e1a467 Mon Sep 17 00:00:00 2001 From: erwei-xilinx Date: Mon, 13 Jan 2025 01:38:57 -0800 Subject: [PATCH 08/11] Replace return_policy with rv_policy --- python/AirHostModule.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/python/AirHostModule.cpp b/python/AirHostModule.cpp index 1a61b8767..5b808d5c4 100644 --- a/python/AirHostModule.cpp +++ b/python/AirHostModule.cpp @@ -24,7 +24,7 @@ void defineAIRHostModule(nb::module_ &m) { m.def( "init_libxaie", []() -> uint64_t { return (uint64_t)air_init_libxaie(); }, - nb::return_value_policy::reference); + nb::rv_policy::reference); m.def("deinit_libxaie", [](uint64_t ctx) -> void { air_deinit_libxaie((air_libxaie_ctx_t)ctx); @@ -43,7 +43,7 @@ void defineAIRHostModule(nb::module_ &m) { segments.push_back(d.segment_descs[i]); return segments; }, - nb::return_value_policy::reference); + nb::rv_policy::reference); nb::class_(m, "SegmentDescriptor") .def( @@ -54,7 +54,7 @@ void defineAIRHostModule(nb::module_ &m) { herds.push_back(d.herd_descs[i]); return herds; }, - nb::return_value_policy::reference) + nb::rv_policy::reference) .def("getName", [](const air_segment_desc_t &d) -> std::string { return {d.name, static_cast(d.name_length)}; }); @@ -73,7 +73,7 @@ void defineAIRHostModule(nb::module_ &m) { m.def("module_unload", &air_module_unload); m.def("get_module_descriptor", &air_module_get_desc, - nb::return_value_policy::reference); + nb::rv_policy::reference); nb::class_ Agent(m, "Agent"); @@ -84,7 +84,7 @@ void defineAIRHostModule(nb::module_ &m) { air_get_agents(agents); return agents; }, - nb::return_value_policy::reference); + nb::rv_policy::reference); nb::class_ Queue(m, "Queue"); @@ -109,11 +109,11 @@ void defineAIRHostModule(nb::module_ &m) { return nullptr; return q; }, - nb::return_value_policy::reference); + nb::rv_policy::reference); m.def( "read32", [](uint64_t addr) -> uint32_t { return air_read32(addr); }, - nb::return_value_policy::copy); + nb::rv_policy::copy); m.def("write32", [](uint64_t addr, uint32_t val) -> void { return air_write32(addr, val); @@ -124,7 +124,7 @@ void defineAIRHostModule(nb::module_ &m) { [](uint32_t col, uint32_t row) -> uint64_t { return air_get_tile_addr(col, row); }, - nb::return_value_policy::copy); + nb::rv_policy::copy); } } // namespace From 55edbd6e1cbe8e0ed74d511c467c2030e138040b Mon Sep 17 00:00:00 2001 From: erwei-xilinx Date: Mon, 13 Jan 2025 03:32:12 -0800 Subject: [PATCH 09/11] Replace applyOpPatternsAndFold with applyOpPatternsGreedily --- mlir/lib/Transform/AIRDependencyScheduleOpt.cpp | 2 +- mlir/lib/Util/Dependency.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Transform/AIRDependencyScheduleOpt.cpp b/mlir/lib/Transform/AIRDependencyScheduleOpt.cpp index 335a9c94c..79842f70e 100644 --- a/mlir/lib/Transform/AIRDependencyScheduleOpt.cpp +++ b/mlir/lib/Transform/AIRDependencyScheduleOpt.cpp @@ -4424,7 +4424,7 @@ LogicalResult AIRIsolateAsyncDmaLoopNestsImpl(Region *region) { RewritePatternSet patterns(ctx); patterns.insert(ctx); - (void)applyOpPatternsAndFold(forOps, std::move(patterns)); + (void)applyOpPatternsGreedily(forOps, std::move(patterns)); // Greedily hoisting air.herd ops out of for loops and merging, and then // re-applying loop splitting. diff --git a/mlir/lib/Util/Dependency.cpp b/mlir/lib/Util/Dependency.cpp index abc7a39e7..4a301ceaf 100644 --- a/mlir/lib/Util/Dependency.cpp +++ b/mlir/lib/Util/Dependency.cpp @@ -554,8 +554,8 @@ void addAsyncDependencyIfNewImpl(scf::ForOp op, Value token) { if (!isAsyncOp(op)) { RewritePatternSet patterns(ctx); patterns.insert(ctx, token); - (void)applyOpPatternsAndFold(ArrayRef{op}, - std::move(patterns)); + (void)applyOpPatternsGreedily(ArrayRef{op}, + std::move(patterns)); } } void addAsyncDependencyIfNewImpl(scf::ParallelOp op, Value token) { From 85ab0833e267ad28ccfb5b44a3b351551f6cda2f Mon Sep 17 00:00:00 2001 From: erwei-xilinx Date: Mon, 13 Jan 2025 19:55:11 -0800 Subject: [PATCH 10/11] Add a workaround to resolve the domain conflict introduced by nanobind 2.4 --- python/CMakeLists.txt | 107 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 105 insertions(+), 2 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 979407b72..61da31a54 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -98,9 +98,111 @@ add_mlir_python_common_capi_library(AirAggregateCAPI AirPythonExtensions ) -add_mlir_python_modules(AirMLIRPythonModules +################################################################################ +# MLIR has switched to using nanobind 2.4 which enforces a check on the domain, +# but MLIR's add_mlir_python_extension and add_mlir_python_modules methods are +# fixing the NB_DOMAIN as mlir, leading to domain conflict between air and aie. +# Below is a workaround which re-implements the above methods by exposing the +# NB_DOMAIN for customization. +# TODO: Remove these when upstream LLVM fixes this issue. +function(add_mlir_python_extension_domain libname extname) +cmake_parse_arguments(ARG +"" +"INSTALL_COMPONENT;INSTALL_DIR;OUTPUT_DIRECTORY;PYTHON_BINDINGS_LIBRARY" +"SOURCES;LINK_LIBS;NB_DOMAIN" +${ARGN}) +set(eh_rtti_enable) +if (MSVC) + set(eh_rtti_enable /EHsc /GR) +elseif(LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL) + set(eh_rtti_enable -frtti -fexceptions) +endif () +nanobind_add_module(${libname} +NB_DOMAIN ${ARG_NB_DOMAIN} +${ARG_SOURCES} +) +if (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL) +target_compile_options(nanobind-static + PRIVATE + -Wno-cast-qual + -Wno-zero-length-array + -Wno-nested-anon-types + -Wno-c++98-compat-extra-semi + -Wno-covered-switch-default + ${eh_rtti_enable} +) +endif() +target_compile_options(${libname} PRIVATE ${eh_rtti_enable}) +set_target_properties( + ${libname} PROPERTIES + LIBRARY_OUTPUT_DIRECTORY ${ARG_OUTPUT_DIRECTORY} + OUTPUT_NAME "${extname}" + NO_SONAME ON +) +target_link_libraries(${libname} + PRIVATE + ${ARG_LINK_LIBS} +) +target_link_options(${libname} + PRIVATE + $<$:LINKER:--exclude-libs,ALL> +) +if(ARG_INSTALL_DIR) + install(TARGETS ${libname} + COMPONENT ${ARG_INSTALL_COMPONENT} + LIBRARY DESTINATION ${ARG_INSTALL_DIR} + ARCHIVE DESTINATION ${ARG_INSTALL_DIR} + RUNTIME DESTINATION ${ARG_INSTALL_DIR} + ) +endif() +endfunction() + +function(add_mlir_python_modules_domain name) +cmake_parse_arguments(ARG + "" + "ROOT_PREFIX;INSTALL_PREFIX" + "COMMON_CAPI_LINK_LIBS;DECLARED_SOURCES;NB_DOMAIN" + ${ARGN}) +function(_process_target modules_target sources_target) + get_target_property(_source_type ${sources_target} mlir_python_SOURCES_TYPE) + if(_source_type STREQUAL "extension") + get_target_property(_module_name ${sources_target} mlir_python_EXTENSION_MODULE_NAME) + get_target_property(_bindings_library ${sources_target} mlir_python_BINDINGS_LIBRARY) + set(_extension_target "${modules_target}.extension.${_module_name}.dso") + add_mlir_python_extension_domain(${_extension_target} "${_module_name}" + INSTALL_COMPONENT ${modules_target} + INSTALL_DIR "${ARG_INSTALL_PREFIX}/_mlir_libs" + OUTPUT_DIRECTORY "${ARG_ROOT_PREFIX}/_mlir_libs" + PYTHON_BINDINGS_LIBRARY ${_bindings_library} + NB_DOMAIN ${ARG_NB_DOMAIN} + LINK_LIBS PRIVATE + ${sources_target} + ${ARG_COMMON_CAPI_LINK_LIBS} + ) + add_dependencies(${modules_target} ${_extension_target}) + mlir_python_setup_extension_rpath(${_extension_target}) + endif() +endfunction() + +add_custom_target(${name} ALL) +_flatten_mlir_python_targets(_flat_targets ${ARG_DECLARED_SOURCES}) +foreach(sources_target ${_flat_targets}) + _process_target(${name} ${sources_target}) +endforeach() + +if(NOT LLVM_ENABLE_IDE) + add_llvm_install_targets( + install-${name} + DEPENDS ${name} + COMPONENT ${name}) +endif() +endfunction() +################################################################################ + +add_mlir_python_modules_domain(AirMLIRPythonModules ROOT_PREFIX "${AIR_PYTHON_PACKAGES_DIR}/air" INSTALL_PREFIX "python/air" + NB_DOMAIN "_air" DECLARED_SOURCES MLIRPythonSources MLIRPythonExtension.Core @@ -111,9 +213,10 @@ add_mlir_python_modules(AirMLIRPythonModules AirAggregateCAPI ) -add_mlir_python_modules(AirPythonModules +add_mlir_python_modules_domain(AirPythonModules ROOT_PREFIX "${AIR_PYTHON_PACKAGES_DIR}/air" INSTALL_PREFIX "python/air" + NB_DOMAIN "_air" DECLARED_SOURCES AirPythonSources COMMON_CAPI_LINK_LIBS From 96d6489038ee6c5ad470521d134bac2d46e54520 Mon Sep 17 00:00:00 2001 From: erwei-xilinx Date: Mon, 13 Jan 2025 20:15:24 -0800 Subject: [PATCH 11/11] Add missing condition in _process_target --- python/CMakeLists.txt | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 61da31a54..e9e8eced3 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -118,19 +118,19 @@ elseif(LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL) set(eh_rtti_enable -frtti -fexceptions) endif () nanobind_add_module(${libname} -NB_DOMAIN ${ARG_NB_DOMAIN} -${ARG_SOURCES} + NB_DOMAIN ${ARG_NB_DOMAIN} + ${ARG_SOURCES} ) if (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL) -target_compile_options(nanobind-static - PRIVATE - -Wno-cast-qual - -Wno-zero-length-array - -Wno-nested-anon-types - -Wno-c++98-compat-extra-semi - -Wno-covered-switch-default - ${eh_rtti_enable} -) + target_compile_options(nanobind-static + PRIVATE + -Wno-cast-qual + -Wno-zero-length-array + -Wno-nested-anon-types + -Wno-c++98-compat-extra-semi + -Wno-covered-switch-default + ${eh_rtti_enable} + ) endif() target_compile_options(${libname} PRIVATE ${eh_rtti_enable}) set_target_properties( @@ -165,7 +165,17 @@ cmake_parse_arguments(ARG ${ARGN}) function(_process_target modules_target sources_target) get_target_property(_source_type ${sources_target} mlir_python_SOURCES_TYPE) - if(_source_type STREQUAL "extension") + if(_source_type STREQUAL "pure") + # Pure python sources to link into the tree. + set(_pure_sources_target "${modules_target}.sources.${sources_target}") + add_mlir_python_sources_target(${_pure_sources_target} + INSTALL_COMPONENT ${modules_target} + INSTALL_DIR ${ARG_INSTALL_PREFIX} + OUTPUT_DIRECTORY ${ARG_ROOT_PREFIX} + SOURCES_TARGETS ${sources_target} + ) + add_dependencies(${modules_target} ${_pure_sources_target}) + elseif(_source_type STREQUAL "extension") get_target_property(_module_name ${sources_target} mlir_python_EXTENSION_MODULE_NAME) get_target_property(_bindings_library ${sources_target} mlir_python_BINDINGS_LIBRARY) set(_extension_target "${modules_target}.extension.${_module_name}.dso")