From 362a7302702f801dbc61e100a327706e8bc7db9c Mon Sep 17 00:00:00 2001 From: Edoardo Vacchi Date: Tue, 28 Nov 2023 14:17:38 +0100 Subject: [PATCH 01/22] wip: constant folding Signed-off-by: Edoardo Vacchi --- internal/engine/wazevo/ssa/pass.go | 18 ++++++++++++++++++ internal/engine/wazevo/ssa/pass_test.go | 24 +++++++++++++++++++++--- 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go index 17c8486f60..f7f78e13a3 100644 --- a/internal/engine/wazevo/ssa/pass.go +++ b/internal/engine/wazevo/ssa/pass.go @@ -329,6 +329,8 @@ func passNopInstElimination(b *builder) { for cur := blk.rootInstr; cur != nil; cur = cur.next { switch cur.Opcode() { // TODO: add more logics here. + // Amount := (Const $someValue) + // (Shift X, Amount) where Amount == x.Type.Bits() => X case OpcodeIshl, OpcodeSshr, OpcodeUshr: x, amount := cur.Arg2() definingInst := b.valueIDToInstruction[amount.ID()] @@ -348,6 +350,22 @@ func passNopInstElimination(b *builder) { b.alias(cur.Return(), x) } } + // Z := Const 0 + // (Iadd X, Z) => X + // (Iadd Z, Y) => Y + case OpcodeIadd: + x, y := cur.Arg2() + definingInst := b.valueIDToInstruction[y.ID()] + if definingInst == nil { + if definingInst = b.valueIDToInstruction[x.ID()]; definingInst == nil { + continue + } else { + x = y + } + } + if definingInst.Constant() && definingInst.ConstantVal() == 0 { + b.alias(cur.Return(), x) + } } } } diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go index 2a831a67f1..a30a61e7ca 100644 --- a/internal/engine/wazevo/ssa/pass_test.go +++ b/internal/engine/wazevo/ssa/pass_test.go @@ -310,8 +310,20 @@ blk1: () <-- (blk0) nonZeroI64 := b.AllocateInstruction().AsIconst64(64*245 + 1).Insert(b).Return() nonZeroSshr := b.AllocateInstruction().AsSshr(i64Param, nonZeroI64).Insert(b).Return() + // Iadd32. + zero32 := b.AllocateInstruction().AsIconst32(0).Insert(b).Return() + nopIadd32 := b.AllocateInstruction().AsIadd(i32Param, zero32).Insert(b).Return() + + // Iadd32. + zero32_2 := b.AllocateInstruction().AsIconst32(0).Insert(b).Return() + nopIadd32_2 := b.AllocateInstruction().AsIadd(zero32_2, i32Param).Insert(b).Return() + + // Iadd64. + zero64 := b.AllocateInstruction().AsIconst64(0).Insert(b).Return() + nopIadd64 := b.AllocateInstruction().AsIadd(i64Param, zero64).Insert(b).Return() + ret := b.AllocateInstruction() - ret.AsReturn([]Value{nopIshl, nopUshr, nonZeroIshl, nonZeroSshr}) + ret.AsReturn([]Value{nopIshl, nopUshr, nonZeroIshl, nonZeroSshr, nopIadd32, nopIadd32_2, nopIadd64}) b.InsertInstruction(ret) return nil }, @@ -325,7 +337,13 @@ blk0: (v0:i32, v1:i64) v7:i32 = Ishl v0, v6 v8:i64 = Iconst_64 0x3d41 v9:i64 = Sshr v1, v8 - Return v3, v5, v7, v9 + v10:i32 = Iconst_32 0x0 + v11:i32 = Iadd v0, v10 + v12:i32 = Iconst_32 0x0 + v13:i32 = Iadd v12, v0 + v14:i64 = Iconst_64 0x0 + v15:i64 = Iadd v1, v14 + Return v3, v5, v7, v9, v11, v13, v15 `, after: ` blk0: (v0:i32, v1:i64) @@ -333,7 +351,7 @@ blk0: (v0:i32, v1:i64) v7:i32 = Ishl v0, v6 v8:i64 = Iconst_64 0x3d41 v9:i64 = Sshr v1, v8 - Return v0, v1, v7, v9 + Return v0, v1, v7, v9, v0, v0, v1 `, }, } { From fe56392beb9940bd8bb3b7e1496208d825b70972 Mon Sep 17 00:00:00 2001 From: Edoardo Vacchi Date: Tue, 28 Nov 2023 17:04:01 +0100 Subject: [PATCH 02/22] wip Signed-off-by: Edoardo Vacchi --- internal/engine/wazevo/ssa/pass.go | 36 ++++++++++++++++------- internal/engine/wazevo/ssa/pass_test.go | 38 +++++++++++++++++-------- 2 files changed, 52 insertions(+), 22 deletions(-) diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go index f7f78e13a3..6cfb6689ec 100644 --- a/internal/engine/wazevo/ssa/pass.go +++ b/internal/engine/wazevo/ssa/pass.go @@ -327,7 +327,8 @@ func passNopInstElimination(b *builder) { for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() { for cur := blk.rootInstr; cur != nil; cur = cur.next { - switch cur.Opcode() { + op := cur.Opcode() + switch op { // TODO: add more logics here. // Amount := (Const $someValue) // (Shift X, Amount) where Amount == x.Type.Bits() => X @@ -351,20 +352,35 @@ func passNopInstElimination(b *builder) { } } // Z := Const 0 - // (Iadd X, Z) => X - // (Iadd Z, Y) => Y - case OpcodeIadd: + // - (Iadd|Isub X, Z) => X + // - (Iadd Z, Y) => Y + case OpcodeIadd: //, OpcodeIsub: x, y := cur.Arg2() - definingInst := b.valueIDToInstruction[y.ID()] - if definingInst == nil { - if definingInst = b.valueIDToInstruction[x.ID()]; definingInst == nil { + xDef := b.valueIDToInstruction[x.ID()] + yDef := b.valueIDToInstruction[y.ID()] + if yDef == nil { + // If there's no defining instruction, that means the amount is coming from the parameter. + if xDef == nil { + // If we are adding the two parameters, ignore. continue } else { - x = y + // Add is commutative, normalize (param, y) => (y, param). + x, y = y, x + xDef, yDef = yDef, xDef } } - if definingInst.Constant() && definingInst.ConstantVal() == 0 { - b.alias(cur.Return(), x) + if yDef.Constant() { + yc := yDef.ConstantVal() + if yc == 0 { + b.alias(cur.Return(), x) + } else if xDef.Constant() { + xc := xDef.ConstantVal() + cur.opcode = OpcodeIconst + cur.u1 = xc + yc + cur.u2 = 0 + cur.v = 0 + cur.v2 = 0 + } } } } diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go index a30a61e7ca..97971d30b9 100644 --- a/internal/engine/wazevo/ssa/pass_test.go +++ b/internal/engine/wazevo/ssa/pass_test.go @@ -310,20 +310,29 @@ blk1: () <-- (blk0) nonZeroI64 := b.AllocateInstruction().AsIconst64(64*245 + 1).Insert(b).Return() nonZeroSshr := b.AllocateInstruction().AsSshr(i64Param, nonZeroI64).Insert(b).Return() - // Iadd32. - zero32 := b.AllocateInstruction().AsIconst32(0).Insert(b).Return() - nopIadd32 := b.AllocateInstruction().AsIadd(i32Param, zero32).Insert(b).Return() + // Iadd32 x + 0 should resolve to const. + zeroI32 := b.AllocateInstruction().AsIconst32(0).Insert(b).Return() + nopIadd32 := b.AllocateInstruction().AsIadd(i32Param, zeroI32).Insert(b).Return() - // Iadd32. - zero32_2 := b.AllocateInstruction().AsIconst32(0).Insert(b).Return() - nopIadd32_2 := b.AllocateInstruction().AsIadd(zero32_2, i32Param).Insert(b).Return() + // Iadd32 0 + x should resolve to const. + zeroI32_2 := b.AllocateInstruction().AsIconst32(0).Insert(b).Return() + nopIadd32_2 := b.AllocateInstruction().AsIadd(zeroI32_2, i32Param).Insert(b).Return() - // Iadd64. - zero64 := b.AllocateInstruction().AsIconst64(0).Insert(b).Return() - nopIadd64 := b.AllocateInstruction().AsIadd(i64Param, zero64).Insert(b).Return() + // Iadd64 x + 0 should resolve to const. + zeroI64 := b.AllocateInstruction().AsIconst64(0).Insert(b).Return() + nopIadd64 := b.AllocateInstruction().AsIadd(i64Param, zeroI64).Insert(b).Return() + + // Iadd64 0 + x should resolve to const. + zeroI64_2 := b.AllocateInstruction().AsIconst64(0).Insert(b).Return() + nopIadd64_2 := b.AllocateInstruction().AsIadd(zeroI64_2, i64Param).Insert(b).Return() + + // Iadd32 const1 + const2 should resolve to Const (const1 + const2). + nonZeroI32_3 := b.AllocateInstruction().AsIconst32(1234).Insert(b).Return() + nonZeroI32_4 := b.AllocateInstruction().AsIconst32(5678).Insert(b).Return() + foldIaddI32_3 := b.AllocateInstruction().AsIadd(nonZeroI32_3, nonZeroI32_4).Insert(b).Return() ret := b.AllocateInstruction() - ret.AsReturn([]Value{nopIshl, nopUshr, nonZeroIshl, nonZeroSshr, nopIadd32, nopIadd32_2, nopIadd64}) + ret.AsReturn([]Value{nopIshl, nopUshr, nonZeroIshl, nonZeroSshr, nopIadd32, nopIadd32_2, nopIadd64, nopIadd64_2, foldIaddI32_3}) b.InsertInstruction(ret) return nil }, @@ -343,7 +352,12 @@ blk0: (v0:i32, v1:i64) v13:i32 = Iadd v12, v0 v14:i64 = Iconst_64 0x0 v15:i64 = Iadd v1, v14 - Return v3, v5, v7, v9, v11, v13, v15 + v16:i64 = Iconst_64 0x0 + v17:i64 = Iadd v16, v1 + v18:i32 = Iconst_32 0x4d2 + v19:i32 = Iconst_32 0x162e + v20:i32 = Iadd v18, v19 + Return v3, v5, v7, v9, v11, v13, v15, v17, v20 `, after: ` blk0: (v0:i32, v1:i64) @@ -351,7 +365,7 @@ blk0: (v0:i32, v1:i64) v7:i32 = Ishl v0, v6 v8:i64 = Iconst_64 0x3d41 v9:i64 = Sshr v1, v8 - Return v0, v1, v7, v9, v0, v0, v1 + Return v0, v1, v7, v9, v0, v0, v1, v1 `, }, } { From d68ac6bf8e58f9fca064ee7f21fbaf48d760a28b Mon Sep 17 00:00:00 2001 From: Edoardo Vacchi Date: Tue, 28 Nov 2023 17:42:18 +0100 Subject: [PATCH 03/22] wip Signed-off-by: Edoardo Vacchi --- internal/engine/wazevo/ssa/pass.go | 106 ++++++++++++++++-------- internal/engine/wazevo/ssa/pass_test.go | 57 ++++++++++--- 2 files changed, 120 insertions(+), 43 deletions(-) diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go index 6cfb6689ec..af71d0d5f4 100644 --- a/internal/engine/wazevo/ssa/pass.go +++ b/internal/engine/wazevo/ssa/pass.go @@ -33,6 +33,8 @@ func (b *builder) RunPasses() { // Arithmetic simplifications. // and more! + passConstFoldingOpt(b) + // passDeadCodeEliminationOpt could be more accurate if we do this after other optimizations. passDeadCodeEliminationOpt(b) b.donePasses = true @@ -309,21 +311,7 @@ func (b *builder) clearBlkVisited() { // passNopInstElimination eliminates the instructions which is essentially a no-op. func passNopInstElimination(b *builder) { - if int(b.nextValueID) >= len(b.valueIDToInstruction) { - b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...) - } - - for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() { - for cur := blk.rootInstr; cur != nil; cur = cur.next { - r1, rs := cur.Returns() - if r1.Valid() { - b.valueIDToInstruction[r1.ID()] = cur - } - for _, r := range rs { - b.valueIDToInstruction[r.ID()] = cur - } - } - } + ensureValueIdToInstructionInit(b) for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() { for cur := blk.rootInstr; cur != nil; cur = cur.next { @@ -352,32 +340,84 @@ func passNopInstElimination(b *builder) { } } // Z := Const 0 - // - (Iadd|Isub X, Z) => X - // - (Iadd Z, Y) => Y - case OpcodeIadd: //, OpcodeIsub: + // (Iadd X, Z) => X + // (Iadd Z, Y) => Y + case OpcodeIadd: x, y := cur.Arg2() - xDef := b.valueIDToInstruction[x.ID()] - yDef := b.valueIDToInstruction[y.ID()] - if yDef == nil { - // If there's no defining instruction, that means the amount is coming from the parameter. - if xDef == nil { - // If we are adding the two parameters, ignore. + definingInst := b.valueIDToInstruction[y.ID()] + if definingInst == nil { + if definingInst = b.valueIDToInstruction[x.ID()]; definingInst == nil { continue } else { - // Add is commutative, normalize (param, y) => (y, param). - x, y = y, x - xDef, yDef = yDef, xDef + x = y } } - if yDef.Constant() { - yc := yDef.ConstantVal() - if yc == 0 { - b.alias(cur.Return(), x) - } else if xDef.Constant() { + if definingInst.Constant() && definingInst.ConstantVal() == 0 { + b.alias(cur.Return(), x) + } + } + } + } +} + +func ensureValueIdToInstructionInit(b *builder) { + if len(b.valueIDToInstruction) != 0 { + return + } + + if int(b.nextValueID) >= len(b.valueIDToInstruction) { + b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...) + } + + for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() { + for cur := blk.rootInstr; cur != nil; cur = cur.next { + r1, rs := cur.Returns() + if r1.Valid() { + b.valueIDToInstruction[r1.ID()] = cur + } + for _, r := range rs { + b.valueIDToInstruction[r.ID()] = cur + } + } + } +} + +// passNopInstElimination eliminates the instructions which is essentially a no-op. +func passConstFoldingOpt(b *builder) { + ensureValueIdToInstructionInit(b) + + isFixedPoint := false + for !isFixedPoint { + isFixedPoint = true + for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() { + for cur := blk.rootInstr; cur != nil; cur = cur.next { + op := cur.Opcode() + switch op { + // X := Const xc + // Y := Const yc + // - (Iadd X, Y) => Const (xc + yc) + case OpcodeIadd, OpcodeIsub: + isFixedPoint = false + x, y := cur.Arg2() + xDef := b.valueIDToInstruction[x.ID()] + yDef := b.valueIDToInstruction[y.ID()] + if xDef == nil || yDef == nil { + // If we are adding some parameter, ignore. + continue + } + if xDef.Constant() || yDef.Constant() { + yc := yDef.ConstantVal() xc := xDef.ConstantVal() + // Mutate the instruction to an Iconst. cur.opcode = OpcodeIconst - cur.u1 = xc + yc + switch op { + case OpcodeIadd: + cur.u1 = xc + yc + case OpcodeIsub: + cur.u1 = xc - yc + } cur.u2 = 0 + // Clear the references to operands. cur.v = 0 cur.v2 = 0 } diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go index 97971d30b9..2bdb14335d 100644 --- a/internal/engine/wazevo/ssa/pass_test.go +++ b/internal/engine/wazevo/ssa/pass_test.go @@ -326,13 +326,8 @@ blk1: () <-- (blk0) zeroI64_2 := b.AllocateInstruction().AsIconst64(0).Insert(b).Return() nopIadd64_2 := b.AllocateInstruction().AsIadd(zeroI64_2, i64Param).Insert(b).Return() - // Iadd32 const1 + const2 should resolve to Const (const1 + const2). - nonZeroI32_3 := b.AllocateInstruction().AsIconst32(1234).Insert(b).Return() - nonZeroI32_4 := b.AllocateInstruction().AsIconst32(5678).Insert(b).Return() - foldIaddI32_3 := b.AllocateInstruction().AsIadd(nonZeroI32_3, nonZeroI32_4).Insert(b).Return() - ret := b.AllocateInstruction() - ret.AsReturn([]Value{nopIshl, nopUshr, nonZeroIshl, nonZeroSshr, nopIadd32, nopIadd32_2, nopIadd64, nopIadd64_2, foldIaddI32_3}) + ret.AsReturn([]Value{nopIshl, nopUshr, nonZeroIshl, nonZeroSshr, nopIadd32, nopIadd32_2, nopIadd64, nopIadd64_2}) b.InsertInstruction(ret) return nil }, @@ -354,10 +349,7 @@ blk0: (v0:i32, v1:i64) v15:i64 = Iadd v1, v14 v16:i64 = Iconst_64 0x0 v17:i64 = Iadd v16, v1 - v18:i32 = Iconst_32 0x4d2 - v19:i32 = Iconst_32 0x162e - v20:i32 = Iadd v18, v19 - Return v3, v5, v7, v9, v11, v13, v15, v17, v20 + Return v3, v5, v7, v9, v11, v13, v15, v17 `, after: ` blk0: (v0:i32, v1:i64) @@ -366,6 +358,51 @@ blk0: (v0:i32, v1:i64) v8:i64 = Iconst_64 0x3d41 v9:i64 = Sshr v1, v8 Return v0, v1, v7, v9, v0, v0, v1, v1 +`, + }, + { + name: "const folding", + pass: passConstFoldingOpt, + postPass: passDeadCodeEliminationOpt, + setup: func(b *builder) (verifier func(t *testing.T)) { + entry := b.AllocateBasicBlock() + b.SetCurrentBlock(entry) + + // Iadd32 const1 + const2 should resolve to Const (const1 + const2). + nonZeroI32_1 := b.AllocateInstruction().AsIconst32(0x1).Insert(b).Return() + nonZeroI32_2 := b.AllocateInstruction().AsIconst32(0x2).Insert(b).Return() + foldIaddI32_1 := b.AllocateInstruction().AsIadd(nonZeroI32_1, nonZeroI32_2).Insert(b).Return() + + // Iadd32 foldedConst1, const3 should resolve to Const (foldedConst1, const3). + nonZeroI32_3 := b.AllocateInstruction().AsIconst32(0x3).Insert(b).Return() + foldIaddI32_2 := b.AllocateInstruction().AsIadd(foldIaddI32_1, nonZeroI32_3).Insert(b).Return() + + // Isub32 foldedConst1, const3 should resolve to Const (const4, foldedConst2). + nonZeroI32_4 := b.AllocateInstruction().AsIconst32(0x4).Insert(b).Return() + foldIsubI32_1 := b.AllocateInstruction().AsIsub(nonZeroI32_4, foldIaddI32_2).Insert(b).Return() + + ret := b.AllocateInstruction() + ret.AsReturn([]Value{foldIsubI32_1}) + b.InsertInstruction(ret) + return nil + }, + before: ` +blk0: () + v0:i32 = Iconst_32 0x1 + v1:i32 = Iconst_32 0x2 + v2:i32 = Iadd v0, v1 + v3:i32 = Iconst_32 0x3 + v4:i32 = Iadd v2, v3 + v5:i32 = Iconst_32 0x4 + v6:i32 = Isub v5, v4 + Return v6 +`, + // FIXME: the first `Iconst_32 0x1` should be dead code, and should not be present in the output. + after: ` +blk0: () + v0:i32 = Iconst_32 0x1 + v6:i32 = Iconst_32 0xfffffffe + Return v6 `, }, } { From 10f0f22be893b2d543eab02e3ef6553910d224ec Mon Sep 17 00:00:00 2001 From: Edoardo Vacchi Date: Tue, 28 Nov 2023 17:54:01 +0100 Subject: [PATCH 04/22] wip Signed-off-by: Edoardo Vacchi --- internal/engine/wazevo/ssa/pass.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go index af71d0d5f4..8d6ce0e504 100644 --- a/internal/engine/wazevo/ssa/pass.go +++ b/internal/engine/wazevo/ssa/pass.go @@ -382,7 +382,7 @@ func ensureValueIdToInstructionInit(b *builder) { } } -// passNopInstElimination eliminates the instructions which is essentially a no-op. +// passConstFoldingOpt folds constant arithmetic ops into constant ops. func passConstFoldingOpt(b *builder) { ensureValueIdToInstructionInit(b) @@ -397,7 +397,6 @@ func passConstFoldingOpt(b *builder) { // Y := Const yc // - (Iadd X, Y) => Const (xc + yc) case OpcodeIadd, OpcodeIsub: - isFixedPoint = false x, y := cur.Arg2() xDef := b.valueIDToInstruction[x.ID()] yDef := b.valueIDToInstruction[y.ID()] @@ -405,7 +404,9 @@ func passConstFoldingOpt(b *builder) { // If we are adding some parameter, ignore. continue } - if xDef.Constant() || yDef.Constant() { + if xDef.Constant() && yDef.Constant() { + isFixedPoint = false + yc := yDef.ConstantVal() xc := xDef.ConstantVal() // Mutate the instruction to an Iconst. From 87e44539219f2411d4fa0e4729983e82b6481172 Mon Sep 17 00:00:00 2001 From: Edoardo Vacchi Date: Wed, 29 Nov 2023 10:39:07 +0100 Subject: [PATCH 05/22] valueIDToInstruction should be always re-inited Signed-off-by: Edoardo Vacchi --- internal/engine/wazevo/ssa/pass.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go index 8d6ce0e504..60823f721e 100644 --- a/internal/engine/wazevo/ssa/pass.go +++ b/internal/engine/wazevo/ssa/pass.go @@ -361,9 +361,7 @@ func passNopInstElimination(b *builder) { } func ensureValueIdToInstructionInit(b *builder) { - if len(b.valueIDToInstruction) != 0 { - return - } + b.valueIDToInstruction = b.valueIDToInstruction[:0] if int(b.nextValueID) >= len(b.valueIDToInstruction) { b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...) From 48bf12da94b3052ca595a3dad1a2d15a4affcbca Mon Sep 17 00:00:00 2001 From: Edoardo Vacchi Date: Wed, 29 Nov 2023 11:35:29 +0100 Subject: [PATCH 06/22] valueIDToInstruction does not have to be cleared explicitly Signed-off-by: Edoardo Vacchi --- internal/engine/wazevo/ssa/pass.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go index 60823f721e..5e8ae74caa 100644 --- a/internal/engine/wazevo/ssa/pass.go +++ b/internal/engine/wazevo/ssa/pass.go @@ -361,8 +361,6 @@ func passNopInstElimination(b *builder) { } func ensureValueIdToInstructionInit(b *builder) { - b.valueIDToInstruction = b.valueIDToInstruction[:0] - if int(b.nextValueID) >= len(b.valueIDToInstruction) { b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...) } From 82d6f04e14cdbb2488b3a95ead3f333432c8e273 Mon Sep 17 00:00:00 2001 From: Edoardo Vacchi Date: Wed, 29 Nov 2023 11:55:26 +0100 Subject: [PATCH 07/22] wip Signed-off-by: Edoardo Vacchi --- internal/engine/wazevo/ssa/pass.go | 46 ++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go index 5e8ae74caa..9b633b1208 100644 --- a/internal/engine/wazevo/ssa/pass.go +++ b/internal/engine/wazevo/ssa/pass.go @@ -2,6 +2,7 @@ package ssa import ( "fmt" + "math" "sort" "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" @@ -392,7 +393,7 @@ func passConstFoldingOpt(b *builder) { // X := Const xc // Y := Const yc // - (Iadd X, Y) => Const (xc + yc) - case OpcodeIadd, OpcodeIsub: + case OpcodeIadd, OpcodeIsub, OpcodeImul: x, y := cur.Arg2() xDef := b.valueIDToInstruction[x.ID()] yDef := b.valueIDToInstruction[y.ID()] @@ -402,21 +403,56 @@ func passConstFoldingOpt(b *builder) { } if xDef.Constant() && yDef.Constant() { isFixedPoint = false - yc := yDef.ConstantVal() xc := xDef.ConstantVal() // Mutate the instruction to an Iconst. + // We assume all the types are consistent. cur.opcode = OpcodeIconst + // Clear the references to operands. + cur.v, cur.v2 = 0, 0 switch op { case OpcodeIadd: cur.u1 = xc + yc case OpcodeIsub: cur.u1 = xc - yc + case OpcodeImul: + cur.u1 = xc * yc } - cur.u2 = 0 + } + case OpcodeFadd, OpcodeFsub: + x, y := cur.Arg2() + xDef := b.valueIDToInstruction[x.ID()] + yDef := b.valueIDToInstruction[y.ID()] + if xDef == nil || yDef == nil { + // If we are adding some parameter, ignore. + continue + } + if xDef.Constant() && yDef.Constant() { + isFixedPoint = false + // Mutate the instruction to an Iconst. + // We assume all the types are consistent. + cur.opcode = OpcodeIconst // Clear the references to operands. - cur.v = 0 - cur.v2 = 0 + cur.v, cur.v2 = 0, 0 + if x.Type().Bits() == 64 { + yc := math.Float64frombits(yDef.ConstantVal()) + xc := math.Float64frombits(xDef.ConstantVal()) + switch op { + case OpcodeFadd: + cur.u1 = uint64(math.Float64bits(xc + yc)) + case OpcodeFsub: + cur.u1 = uint64(math.Float64bits(xc - yc)) + } + } else { + yc := math.Float32frombits(uint32(yDef.ConstantVal())) + xc := math.Float32frombits(uint32(xDef.ConstantVal())) + switch op { + case OpcodeFadd: + cur.u1 = uint64(math.Float32bits(xc + yc)) + case OpcodeFsub: + cur.u1 = uint64(math.Float32bits(xc - yc)) + } + } } } } From 500aa5457ecaf05b2a017465e6fb7c3524c199d3 Mon Sep 17 00:00:00 2001 From: Edoardo Vacchi Date: Wed, 29 Nov 2023 13:44:38 +0100 Subject: [PATCH 08/22] operands were not actually cleared! (not reset to ValueInvalid) Signed-off-by: Edoardo Vacchi --- internal/engine/wazevo/ssa/pass.go | 9 +++++---- internal/engine/wazevo/ssa/pass_test.go | 1 - 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go index 5e8ae74caa..70b9056b28 100644 --- a/internal/engine/wazevo/ssa/pass.go +++ b/internal/engine/wazevo/ssa/pass.go @@ -405,6 +405,11 @@ func passConstFoldingOpt(b *builder) { yc := yDef.ConstantVal() xc := xDef.ConstantVal() + + // Clear the references to operands. + cur.v = ValueInvalid + cur.v2 = ValueInvalid + // Mutate the instruction to an Iconst. cur.opcode = OpcodeIconst switch op { @@ -413,10 +418,6 @@ func passConstFoldingOpt(b *builder) { case OpcodeIsub: cur.u1 = xc - yc } - cur.u2 = 0 - // Clear the references to operands. - cur.v = 0 - cur.v2 = 0 } } } diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go index 2bdb14335d..3692dd2cd1 100644 --- a/internal/engine/wazevo/ssa/pass_test.go +++ b/internal/engine/wazevo/ssa/pass_test.go @@ -400,7 +400,6 @@ blk0: () // FIXME: the first `Iconst_32 0x1` should be dead code, and should not be present in the output. after: ` blk0: () - v0:i32 = Iconst_32 0x1 v6:i32 = Iconst_32 0xfffffffe Return v6 `, From 6c10833ecfb52123df2682b547afe2bccadea5d2 Mon Sep 17 00:00:00 2001 From: Edoardo Vacchi Date: Wed, 29 Nov 2023 16:14:11 +0100 Subject: [PATCH 09/22] add tests for mul and floats Signed-off-by: Edoardo Vacchi --- internal/engine/wazevo/ssa/instructions.go | 9 +- internal/engine/wazevo/ssa/pass.go | 12 +-- internal/engine/wazevo/ssa/pass_test.go | 102 ++++++++++++++++++++- 3 files changed, 109 insertions(+), 14 deletions(-) diff --git a/internal/engine/wazevo/ssa/instructions.go b/internal/engine/wazevo/ssa/instructions.go index 4dbe0883af..97730e81a5 100644 --- a/internal/engine/wazevo/ssa/instructions.go +++ b/internal/engine/wazevo/ssa/instructions.go @@ -1732,27 +1732,30 @@ func (i *Instruction) InsertlaneData() (x, y Value, index byte, l VecLane) { } // AsFadd initializes this instruction as a floating-point addition instruction with OpcodeFadd. -func (i *Instruction) AsFadd(x, y Value) { +func (i *Instruction) AsFadd(x, y Value) *Instruction { i.opcode = OpcodeFadd i.v = x i.v2 = y i.typ = x.Type() + return i } // AsFsub initializes this instruction as a floating-point subtraction instruction with OpcodeFsub. -func (i *Instruction) AsFsub(x, y Value) { +func (i *Instruction) AsFsub(x, y Value) *Instruction { i.opcode = OpcodeFsub i.v = x i.v2 = y i.typ = x.Type() + return i } // AsFmul initializes this instruction as a floating-point multiplication instruction with OpcodeFmul. -func (i *Instruction) AsFmul(x, y Value) { +func (i *Instruction) AsFmul(x, y Value) *Instruction { i.opcode = OpcodeFmul i.v = x i.v2 = y i.typ = x.Type() + return i } // AsFdiv initializes this instruction as a floating-point division instruction with OpcodeFdiv. diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go index 77636c34e5..005da0bb3f 100644 --- a/internal/engine/wazevo/ssa/pass.go +++ b/internal/engine/wazevo/ssa/pass.go @@ -379,7 +379,8 @@ func ensureValueIdToInstructionInit(b *builder) { } } -// passConstFoldingOpt folds constant arithmetic ops into constant ops. +// passConstFoldingOpt scans all instructions for arithmetic operations over constants, +// and replaces them with a const of their result. func passConstFoldingOpt(b *builder) { ensureValueIdToInstructionInit(b) @@ -404,11 +405,10 @@ func passConstFoldingOpt(b *builder) { if xDef.Constant() && yDef.Constant() { isFixedPoint = false // Mutate the instruction to an Iconst. - // We assume all the types are consistent. cur.opcode = OpcodeIconst // Clear the references to operands. cur.v, cur.v2 = ValueInvalid, ValueInvalid - + // We assume all the types are consistent. xc, yc := xDef.ConstantVal(), yDef.ConstantVal() switch op { case OpcodeIadd: @@ -430,12 +430,11 @@ func passConstFoldingOpt(b *builder) { if xDef.Constant() && yDef.Constant() { isFixedPoint = false // Mutate the instruction to an Iconst. - // We assume all the types are consistent. - cur.opcode = OpcodeIconst // Clear the references to operands. cur.v, cur.v2 = ValueInvalid, ValueInvalid - + // We assume all the types are consistent. if x.Type().Bits() == 64 { + cur.opcode = OpcodeF64const yc := math.Float64frombits(yDef.ConstantVal()) xc := math.Float64frombits(xDef.ConstantVal()) switch op { @@ -447,6 +446,7 @@ func passConstFoldingOpt(b *builder) { cur.u1 = math.Float64bits(xc * yc) } } else { + cur.opcode = OpcodeF32const yc := math.Float32frombits(uint32(yDef.ConstantVal())) xc := math.Float32frombits(uint32(xDef.ConstantVal())) switch op { diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go index 3692dd2cd1..4e8635ea53 100644 --- a/internal/engine/wazevo/ssa/pass_test.go +++ b/internal/engine/wazevo/ssa/pass_test.go @@ -381,8 +381,69 @@ blk0: (v0:i32, v1:i64) nonZeroI32_4 := b.AllocateInstruction().AsIconst32(0x4).Insert(b).Return() foldIsubI32_1 := b.AllocateInstruction().AsIsub(nonZeroI32_4, foldIaddI32_2).Insert(b).Return() + // Imul32 foldedConst, foldedConst should resolve to IConst32 (foldedConst * foldedConst). + foldImulI32_1 := b.AllocateInstruction().AsImul(foldIsubI32_1, foldIsubI32_1).Insert(b).Return() + + // Iadd64 const1 + const2 should resolve to Const (const1 + const2). + nonZeroI64_1 := b.AllocateInstruction().AsIconst64(0x1).Insert(b).Return() + nonZeroI64_2 := b.AllocateInstruction().AsIconst64(0x2).Insert(b).Return() + foldIaddI64_1 := b.AllocateInstruction().AsIadd(nonZeroI64_1, nonZeroI64_2).Insert(b).Return() + + // Iadd64 foldedConst1, const3 should resolve to Const (foldedConst1, const3). + nonZeroI64_3 := b.AllocateInstruction().AsIconst64(0x3).Insert(b).Return() + foldIaddI64_2 := b.AllocateInstruction().AsIadd(foldIaddI64_1, nonZeroI64_3).Insert(b).Return() + + // Isub64 const4, foldedConst1 should resolve to Const (const4, foldedConst2). + nonZeroI64_4 := b.AllocateInstruction().AsIconst64(0x4).Insert(b).Return() + foldIsubI64_1 := b.AllocateInstruction().AsIsub(nonZeroI64_4, foldIaddI64_2).Insert(b).Return() + + // Imul64 foldedConst, foldedConst should resolve to IConst64 (foldedConst * foldedConst). + foldImulI64_1 := b.AllocateInstruction().AsImul(foldIsubI64_1, foldIsubI64_1).Insert(b).Return() + + // Fadd32 const1 + const2 should resolve to Const (const1 + const2). + nonZeroF32_1 := b.AllocateInstruction().AsF32const(1.0).Insert(b).Return() + nonZeroF32_2 := b.AllocateInstruction().AsF32const(2.0).Insert(b).Return() + foldFaddF32_1 := b.AllocateInstruction().AsFadd(nonZeroF32_1, nonZeroF32_2).Insert(b).Return() + + // Fadd32 foldedConst1, const3 should resolve to Const (foldedConst1 + const3). + nonZeroF32_3 := b.AllocateInstruction().AsF32const(3.0).Insert(b).Return() + foldIaddF32_2 := b.AllocateInstruction().AsFadd(foldFaddF32_1, nonZeroF32_3).Insert(b).Return() + + // Fsub32 const4, foldedConst1 should resolve to Const (const4 - foldedConst2). + nonZeroF32_4 := b.AllocateInstruction().AsF32const(4.0).Insert(b).Return() + foldIsubF32_1 := b.AllocateInstruction().AsFsub(nonZeroF32_4, foldIaddF32_2).Insert(b).Return() + + // Fmul32 foldedConst, foldedConst should resolve to FConst32 (foldedConst * foldedConst). + foldFmulF32_1 := b.AllocateInstruction().AsFmul(foldIsubF32_1, foldIsubF32_1).Insert(b).Return() + + // Fadd64 const1 + const2 should resolve to FConst64 (const1 + const2). + nonZeroF64_1 := b.AllocateInstruction().AsF64const(1.0).Insert(b).Return() + nonZeroF64_2 := b.AllocateInstruction().AsF64const(2.0).Insert(b).Return() + // This intermediate value won't be dropped because it is referenced in the result. + foldFaddF64_1 := b.AllocateInstruction().AsFadd(nonZeroF64_1, nonZeroF64_2).Insert(b).Return() + + // Fadd64 foldedConst1, const3 should resolve to FConst64 (foldedConst1 + const3). + nonZeroF64_3 := b.AllocateInstruction().AsF64const(3.0).Insert(b).Return() + foldFaddF64_2 := b.AllocateInstruction().AsFadd(foldFaddF64_1, nonZeroF64_3).Insert(b).Return() + + // Fsub64 const4, foldedConst1 should resolve to FConst64 (const4 - foldedConst2). + nonZeroF64_4 := b.AllocateInstruction().AsF64const(4.0).Insert(b).Return() + foldFsubF64_1 := b.AllocateInstruction().AsFsub(nonZeroF64_4, foldFaddF64_2).Insert(b).Return() + + // Fmul64 foldedConst, foldedConst should resolve to FConst64 (foldedConst * foldedConst). + foldFmulF64_1 := b.AllocateInstruction().AsFmul(foldFsubF64_1, foldFsubF64_1).Insert(b).Return() + ret := b.AllocateInstruction() - ret.AsReturn([]Value{foldIsubI32_1}) + ret.AsReturn([]Value{ + foldImulI32_1, + foldIsubI64_1, + foldImulI64_1, + foldIsubF32_1, + foldFmulF32_1, + foldFaddF64_1, + foldFsubF64_1, + foldFmulF64_1, + }) b.InsertInstruction(ret) return nil }, @@ -395,13 +456,44 @@ blk0: () v4:i32 = Iadd v2, v3 v5:i32 = Iconst_32 0x4 v6:i32 = Isub v5, v4 - Return v6 + v7:i32 = Imul v6, v6 + v8:i64 = Iconst_64 0x1 + v9:i64 = Iconst_64 0x2 + v10:i64 = Iadd v8, v9 + v11:i64 = Iconst_64 0x3 + v12:i64 = Iadd v10, v11 + v13:i64 = Iconst_64 0x4 + v14:i64 = Isub v13, v12 + v15:i64 = Imul v14, v14 + v16:f32 = F32const 1.000000 + v17:f32 = F32const 2.000000 + v18:f32 = Fadd v16, v17 + v19:f32 = F32const 3.000000 + v20:f32 = Fadd v18, v19 + v21:f32 = F32const 4.000000 + v22:f32 = Fsub v21, v20 + v23:f32 = Fmul v22, v22 + v24:f64 = F64const 1.000000 + v25:f64 = F64const 2.000000 + v26:f64 = Fadd v24, v25 + v27:f64 = F64const 3.000000 + v28:f64 = Fadd v26, v27 + v29:f64 = F64const 4.000000 + v30:f64 = Fsub v29, v28 + v31:f64 = Fmul v30, v30 + Return v7, v14, v15, v22, v23, v26, v30, v31 `, - // FIXME: the first `Iconst_32 0x1` should be dead code, and should not be present in the output. after: ` blk0: () - v6:i32 = Iconst_32 0xfffffffe - Return v6 + v7:i32 = Iconst_32 0x4 + v14:i64 = Iconst_64 0xfffffffffffffffe + v15:i64 = Iconst_64 0x4 + v22:f32 = F32const -2.000000 + v23:f32 = F32const 4.000000 + v26:f64 = F64const 3.000000 + v30:f64 = F64const -2.000000 + v31:f64 = F64const 4.000000 + Return v7, v14, v15, v22, v23, v26, v30, v31 `, }, } { From 073789ca3329c001d420d2295c9740133e2b7156 Mon Sep 17 00:00:00 2001 From: Edoardo Vacchi Date: Wed, 29 Nov 2023 16:30:49 +0100 Subject: [PATCH 10/22] passCollectValueIdToInstructionMapping is its own pass Signed-off-by: Edoardo Vacchi --- internal/engine/wazevo/ssa/pass.go | 8 +++----- internal/engine/wazevo/ssa/pass_test.go | 14 ++++++++++---- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go index 005da0bb3f..3cef6b169b 100644 --- a/internal/engine/wazevo/ssa/pass.go +++ b/internal/engine/wazevo/ssa/pass.go @@ -20,6 +20,8 @@ func (b *builder) RunPasses() { passRedundantPhiEliminationOpt(b) // The result of passCalculateImmediateDominators will be used by various passes below. passCalculateImmediateDominators(b) + + passCollectValueIdToInstructionMapping(b) passNopInstElimination(b) // TODO: implement either conversion of irreducible CFG into reducible one, or irreducible CFG detection where we panic. @@ -312,8 +314,6 @@ func (b *builder) clearBlkVisited() { // passNopInstElimination eliminates the instructions which is essentially a no-op. func passNopInstElimination(b *builder) { - ensureValueIdToInstructionInit(b) - for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() { for cur := blk.rootInstr; cur != nil; cur = cur.next { op := cur.Opcode() @@ -361,7 +361,7 @@ func passNopInstElimination(b *builder) { } } -func ensureValueIdToInstructionInit(b *builder) { +func passCollectValueIdToInstructionMapping(b *builder) { if int(b.nextValueID) >= len(b.valueIDToInstruction) { b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...) } @@ -382,8 +382,6 @@ func ensureValueIdToInstructionInit(b *builder) { // passConstFoldingOpt scans all instructions for arithmetic operations over constants, // and replaces them with a const of their result. func passConstFoldingOpt(b *builder) { - ensureValueIdToInstructionInit(b) - isFixedPoint := false for !isFixedPoint { isFixedPoint = true diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go index 4e8635ea53..28f602c8b8 100644 --- a/internal/engine/wazevo/ssa/pass_test.go +++ b/internal/engine/wazevo/ssa/pass_test.go @@ -285,8 +285,11 @@ blk1: () <-- (blk0) `, }, { - name: "nop elimination", - pass: passNopInstElimination, + name: "nop elimination", + pass: func(b *builder) { + passCollectValueIdToInstructionMapping(b) + passNopInstElimination(b) + }, postPass: passDeadCodeEliminationOpt, setup: func(b *builder) (verifier func(t *testing.T)) { entry := b.AllocateBasicBlock() @@ -361,8 +364,11 @@ blk0: (v0:i32, v1:i64) `, }, { - name: "const folding", - pass: passConstFoldingOpt, + name: "const folding", + pass: func(b *builder) { + passCollectValueIdToInstructionMapping(b) + passConstFoldingOpt(b) + }, postPass: passDeadCodeEliminationOpt, setup: func(b *builder) (verifier func(t *testing.T)) { entry := b.AllocateBasicBlock() From e01691ca4a9791f49791e445c2036f377c4dbe05 Mon Sep 17 00:00:00 2001 From: Edoardo Vacchi Date: Wed, 29 Nov 2023 16:33:23 +0100 Subject: [PATCH 11/22] deduplicate code in pass deadcode elim Signed-off-by: Edoardo Vacchi --- internal/engine/wazevo/ssa/pass.go | 11 ----------- internal/engine/wazevo/ssa/pass_test.go | 5 ++++- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go index 3cef6b169b..1c7376b5c8 100644 --- a/internal/engine/wazevo/ssa/pass.go +++ b/internal/engine/wazevo/ssa/pass.go @@ -179,9 +179,6 @@ func passDeadCodeEliminationOpt(b *builder) { if nvid >= len(b.valueRefCounts) { b.valueRefCounts = append(b.valueRefCounts, make([]int, b.nextValueID)...) } - if nvid >= len(b.valueIDToInstruction) { - b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...) - } // First, we gather all the instructions with side effects. liveInstructions := b.instStack[:0] @@ -200,14 +197,6 @@ func passDeadCodeEliminationOpt(b *builder) { // The strict side effect should create different instruction groups. gid++ } - - r1, rs := cur.Returns() - if r1.Valid() { - b.valueIDToInstruction[r1.ID()] = cur - } - for _, r := range rs { - b.valueIDToInstruction[r.ID()] = cur - } } } diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go index 28f602c8b8..402931af27 100644 --- a/internal/engine/wazevo/ssa/pass_test.go +++ b/internal/engine/wazevo/ssa/pass_test.go @@ -194,7 +194,10 @@ blk2: () <-- (blk1) }, { name: "dead code", - pass: passDeadCodeEliminationOpt, + pass: func(b *builder) { + passCollectValueIdToInstructionMapping(b) + passDeadCodeEliminationOpt(b) + }, setup: func(b *builder) func(*testing.T) { entry, end := b.AllocateBasicBlock(), b.AllocateBasicBlock() From fdcc3c726f89a17922bc60774e628124c3d73b81 Mon Sep 17 00:00:00 2001 From: Edoardo Vacchi Date: Wed, 29 Nov 2023 16:56:52 +0100 Subject: [PATCH 12/22] fixed point is local to each basic block Signed-off-by: Edoardo Vacchi --- internal/engine/wazevo/ssa/pass.go | 17 ++++++++------ internal/engine/wazevo/ssa/pass_test.go | 30 ++++++++++++------------- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go index 1c7376b5c8..046a1c70d8 100644 --- a/internal/engine/wazevo/ssa/pass.go +++ b/internal/engine/wazevo/ssa/pass.go @@ -18,10 +18,11 @@ func (b *builder) RunPasses() { passSortSuccessors(b) passDeadBlockEliminationOpt(b) passRedundantPhiEliminationOpt(b) - // The result of passCalculateImmediateDominators will be used by various passes below. + // The result of passCalculateImmediateDominators and passCollectValueIdToInstructionMapping + // will be used by various passes below. passCalculateImmediateDominators(b) - passCollectValueIdToInstructionMapping(b) + passNopInstElimination(b) // TODO: implement either conversion of irreducible CFG into reducible one, or irreducible CFG detection where we panic. @@ -371,11 +372,13 @@ func passCollectValueIdToInstructionMapping(b *builder) { // passConstFoldingOpt scans all instructions for arithmetic operations over constants, // and replaces them with a const of their result. func passConstFoldingOpt(b *builder) { - isFixedPoint := false - for !isFixedPoint { - isFixedPoint = true - for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() { - for cur := blk.rootInstr; cur != nil; cur = cur.next { + for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() { + for cur := blk.rootInstr; cur != nil; cur = cur.next { + // The fixed point is reached through a simple iteration over the list of instructions. + // Note: Instead of just an unbounded loop with a flag, we may also add an upper bound to the number of iterations. + isFixedPoint := false + for !isFixedPoint { + isFixedPoint = true op := cur.Opcode() switch op { // X := Const xc diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go index 402931af27..853382a33b 100644 --- a/internal/engine/wazevo/ssa/pass_test.go +++ b/internal/engine/wazevo/ssa/pass_test.go @@ -9,6 +9,9 @@ import ( func TestBuilder_passes(t *testing.T) { for _, tc := range []struct { name string + // prePass is run before the pass is executed, and can be used to configure the environment + // (e.g. init `*builder` fields). + prePass, // pass is the optimization pass to run. pass, // postPass is run after the pass is executed, and can be used to test a pass that depends on another pass. @@ -193,11 +196,9 @@ blk2: () <-- (blk1) `, }, { - name: "dead code", - pass: func(b *builder) { - passCollectValueIdToInstructionMapping(b) - passDeadCodeEliminationOpt(b) - }, + name: "dead code", + prePass: passCollectValueIdToInstructionMapping, + pass: passDeadCodeEliminationOpt, setup: func(b *builder) func(*testing.T) { entry, end := b.AllocateBasicBlock(), b.AllocateBasicBlock() @@ -288,11 +289,9 @@ blk1: () <-- (blk0) `, }, { - name: "nop elimination", - pass: func(b *builder) { - passCollectValueIdToInstructionMapping(b) - passNopInstElimination(b) - }, + name: "nop elimination", + prePass: passCollectValueIdToInstructionMapping, + pass: passNopInstElimination, postPass: passDeadCodeEliminationOpt, setup: func(b *builder) (verifier func(t *testing.T)) { entry := b.AllocateBasicBlock() @@ -367,11 +366,9 @@ blk0: (v0:i32, v1:i64) `, }, { - name: "const folding", - pass: func(b *builder) { - passCollectValueIdToInstructionMapping(b) - passConstFoldingOpt(b) - }, + name: "const folding", + prePass: passCollectValueIdToInstructionMapping, + pass: passConstFoldingOpt, postPass: passDeadCodeEliminationOpt, setup: func(b *builder) (verifier func(t *testing.T)) { entry := b.AllocateBasicBlock() @@ -511,6 +508,9 @@ blk0: () b := NewBuilder().(*builder) verifier := tc.setup(b) require.Equal(t, tc.before, b.Format()) + if tc.prePass != nil { + tc.prePass(b) + } tc.pass(b) if verifier != nil { verifier(t) From 2842b95e135a7924a54e6da8eeffd9cbad45021a Mon Sep 17 00:00:00 2001 From: Edoardo Vacchi Date: Wed, 29 Nov 2023 23:00:27 +0100 Subject: [PATCH 13/22] more test cases Signed-off-by: Edoardo Vacchi --- internal/engine/wazevo/ssa/pass.go | 28 +++++++--- internal/engine/wazevo/ssa/pass_test.go | 68 +++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 8 deletions(-) diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go index 046a1c70d8..24de30bb1b 100644 --- a/internal/engine/wazevo/ssa/pass.go +++ b/internal/engine/wazevo/ssa/pass.go @@ -399,14 +399,26 @@ func passConstFoldingOpt(b *builder) { // Clear the references to operands. cur.v, cur.v2 = ValueInvalid, ValueInvalid // We assume all the types are consistent. - xc, yc := xDef.ConstantVal(), yDef.ConstantVal() - switch op { - case OpcodeIadd: - cur.u1 = xc + yc - case OpcodeIsub: - cur.u1 = xc - yc - case OpcodeImul: - cur.u1 = xc * yc + if x.Type().Bits() == 64 { + xc, yc := int64(xDef.ConstantVal()), int64(yDef.ConstantVal()) + switch op { + case OpcodeIadd: + cur.u1 = uint64(xc + yc) + case OpcodeIsub: + cur.u1 = uint64(xc - yc) + case OpcodeImul: + cur.u1 = uint64(xc * yc) + } + } else { + xc, yc := int32(xDef.ConstantVal()), int32(yDef.ConstantVal()) + switch op { + case OpcodeIadd: + cur.u1 = uint64(xc + yc) + case OpcodeIsub: + cur.u1 = uint64(xc - yc) + case OpcodeImul: + cur.u1 = uint64(xc * yc) + } } } case OpcodeFadd, OpcodeFsub, OpcodeFmul: diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go index 853382a33b..1b786eceaf 100644 --- a/internal/engine/wazevo/ssa/pass_test.go +++ b/internal/engine/wazevo/ssa/pass_test.go @@ -1,6 +1,7 @@ package ssa import ( + "math" "testing" "github.com/tetratelabs/wazero/internal/testing/require" @@ -500,6 +501,73 @@ blk0: () v30:f64 = F64const -2.000000 v31:f64 = F64const 4.000000 Return v7, v14, v15, v22, v23, v26, v30, v31 +`, + }, + { + name: "const folding (overflow)", + prePass: passCollectValueIdToInstructionMapping, + pass: passConstFoldingOpt, + postPass: passDeadCodeEliminationOpt, + setup: func(b *builder) (verifier func(t *testing.T)) { + entry := b.AllocateBasicBlock() + b.SetCurrentBlock(entry) + + maxI32 := b.AllocateInstruction().AsIconst32(math.MaxInt32).Insert(b).Return() + oneI32 := b.AllocateInstruction().AsIconst32(1).Insert(b).Return() + wrapI32 := b.AllocateInstruction().AsIadd(maxI32, oneI32).Insert(b).Return() + mulI32 := b.AllocateInstruction().AsImul(maxI32, maxI32).Insert(b).Return() + + maxI64 := b.AllocateInstruction().AsIconst64(math.MaxInt64).Insert(b).Return() + oneI64 := b.AllocateInstruction().AsIconst64(1).Insert(b).Return() + wrapI64 := b.AllocateInstruction().AsIadd(maxI64, oneI64).Insert(b).Return() + mulI64 := b.AllocateInstruction().AsImul(maxI64, maxI64).Insert(b).Return() + + maxF32 := b.AllocateInstruction().AsF32const(math.MaxFloat32).Insert(b).Return() + oneF32 := b.AllocateInstruction().AsF32const(1.0).Insert(b).Return() + addF32 := b.AllocateInstruction().AsFadd(maxF32, oneF32).Insert(b).Return() + mulF32 := b.AllocateInstruction().AsFmul(maxF32, maxF32).Insert(b).Return() + + maxF64 := b.AllocateInstruction().AsF64const(math.MaxFloat64).Insert(b).Return() + oneF64 := b.AllocateInstruction().AsF64const(1.0).Insert(b).Return() + addF64 := b.AllocateInstruction().AsFadd(maxF64, oneF64).Insert(b).Return() + mulF64 := b.AllocateInstruction().AsFmul(maxF64, maxF64).Insert(b).Return() + + ret := b.AllocateInstruction() + ret.AsReturn([]Value{wrapI32, mulI32, wrapI64, mulI64, addF32, mulF32, addF64, mulF64}) + b.InsertInstruction(ret) + return nil + }, + before: ` +blk0: () + v0:i32 = Iconst_32 0x7fffffff + v1:i32 = Iconst_32 0x1 + v2:i32 = Iadd v0, v1 + v3:i32 = Imul v0, v0 + v4:i64 = Iconst_64 0x7fffffffffffffff + v5:i64 = Iconst_64 0x1 + v6:i64 = Iadd v4, v5 + v7:i64 = Imul v4, v4 + v8:f32 = F32const 340282346638528859811704183484516925440.000000 + v9:f32 = F32const 1.000000 + v10:f32 = Fadd v8, v9 + v11:f32 = Fmul v8, v8 + v12:f64 = F64const 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000 + v13:f64 = F64const 1.000000 + v14:f64 = Fadd v12, v13 + v15:f64 = Fmul v12, v12 + Return v2, v3, v6, v7, v10, v11, v14, v15 +`, + after: ` +blk0: () + v2:i32 = Iconst_32 0x80000000 + v3:i32 = Iconst_32 0x1 + v6:i64 = Iconst_64 0x8000000000000000 + v7:i64 = Iconst_64 0x1 + v10:f32 = F32const 340282346638528859811704183484516925440.000000 + v11:f32 = F32const +Inf + v14:f64 = F64const 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000 + v15:f64 = F64const +Inf + Return v2, v3, v6, v7, v10, v11, v14, v15 `, }, } { From 2f76dadcb3e1d52963ca8ae0c130ffef63843c88 Mon Sep 17 00:00:00 2001 From: Edoardo Vacchi Date: Thu, 30 Nov 2023 10:48:03 +0100 Subject: [PATCH 14/22] float cases Signed-off-by: Edoardo Vacchi --- internal/engine/wazevo/ssa/pass_test.go | 110 +++++++++++++++++++----- 1 file changed, 89 insertions(+), 21 deletions(-) diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go index 1b786eceaf..57dc4230f4 100644 --- a/internal/engine/wazevo/ssa/pass_test.go +++ b/internal/engine/wazevo/ssa/pass_test.go @@ -514,26 +514,70 @@ blk0: () maxI32 := b.AllocateInstruction().AsIconst32(math.MaxInt32).Insert(b).Return() oneI32 := b.AllocateInstruction().AsIconst32(1).Insert(b).Return() + // Iadd MaxInt32, 1 overflows and wraps around to 0x80000000 (min representable Int32) wrapI32 := b.AllocateInstruction().AsIadd(maxI32, oneI32).Insert(b).Return() + // Imul MaxInt32, MaxInt32 overflows and wraps around to 0x1. mulI32 := b.AllocateInstruction().AsImul(maxI32, maxI32).Insert(b).Return() + // Explicitly using the constant because math.MinInt32 is not representable. + minI32 := b.AllocateInstruction().AsIconst32(0x80000000).Insert(b).Return() + // Isub 0x80000000, 1 overflows and wraps around to 0x7fffffff (max representable Int32) + negWrapI32 := b.AllocateInstruction().AsIsub(minI32, oneI32).Insert(b).Return() + maxI64 := b.AllocateInstruction().AsIconst64(math.MaxInt64).Insert(b).Return() oneI64 := b.AllocateInstruction().AsIconst64(1).Insert(b).Return() + // Iadd MaxInt64, 1 overflows and wraps around to 0x8000000000000000 (min representable Int64) wrapI64 := b.AllocateInstruction().AsIadd(maxI64, oneI64).Insert(b).Return() mulI64 := b.AllocateInstruction().AsImul(maxI64, maxI64).Insert(b).Return() + // Explicitly using the constant because math.MinInt64 is not representable. + minI64 := b.AllocateInstruction().AsIconst64(0x8000000000000000).Insert(b).Return() + // Isub 0x8000000000000000, 1 overflows and wraps around to 0x7fffffffffffffff (max representable Int64) + negWrapI64 := b.AllocateInstruction().AsIsub(minI64, oneI64).Insert(b).Return() + maxF32 := b.AllocateInstruction().AsF32const(math.MaxFloat32).Insert(b).Return() oneF32 := b.AllocateInstruction().AsF32const(1.0).Insert(b).Return() + // Fadd MaxFloat32, 1 absorbs the value and returns MaxFloat32. addF32 := b.AllocateInstruction().AsFadd(maxF32, oneF32).Insert(b).Return() + // Fadd MaxFloat32, MaxFloat32 returns +Inf. + addF32_2 := b.AllocateInstruction().AsFadd(maxF32, maxF32).Insert(b).Return() + // Fmul MaxFloat32, MaxFloat32 returns +Inf. mulF32 := b.AllocateInstruction().AsFmul(maxF32, maxF32).Insert(b).Return() + minF32 := b.AllocateInstruction().AsF32const(-math.MaxFloat32).Insert(b).Return() + // Fsub -MaxFloat32, 1 absorbs the value and returns -MaxFloat32. + subF32 := b.AllocateInstruction().AsFsub(minF32, oneF32).Insert(b).Return() + // Fsub -MaxFloat32, -MaxFloat32 returns ?? + subF32_2 := b.AllocateInstruction().AsFadd(minF32, minF32).Insert(b).Return() + // Fmul returns +Inf. + mulMinF32 := b.AllocateInstruction().AsFmul(minF32, minF32).Insert(b).Return() + maxF64 := b.AllocateInstruction().AsF64const(math.MaxFloat64).Insert(b).Return() oneF64 := b.AllocateInstruction().AsF64const(1.0).Insert(b).Return() + // Fadd MaxFloat64, 1 absorbs the value and returns MaxFloat64. addF64 := b.AllocateInstruction().AsFadd(maxF64, oneF64).Insert(b).Return() + // Fadd MaxFloat64, MaxFloat64 returns +Inf. + addF64_2 := b.AllocateInstruction().AsFadd(maxF64, maxF64).Insert(b).Return() + // Fmul MaxFloat64, MaxFloat64 returns +Inf. mulF64 := b.AllocateInstruction().AsFmul(maxF64, maxF64).Insert(b).Return() + minF64 := b.AllocateInstruction().AsF64const(-math.MaxFloat64).Insert(b).Return() + // Fsub -MaxFloat64, 1 absorbs the value and returns -MaxFloat64. + subF64 := b.AllocateInstruction().AsFsub(minF64, oneF64).Insert(b).Return() + // Fsub -MaxFloat64, -MaxFloat64 returns -Inf. + subF64_2 := b.AllocateInstruction().AsFadd(minF64, minF64).Insert(b).Return() + // Fmul -MaxFloat64, -MaxFloat64 returns +Inf. + mulMinF64 := b.AllocateInstruction().AsFmul(minF64, minF64).Insert(b).Return() + ret := b.AllocateInstruction() - ret.AsReturn([]Value{wrapI32, mulI32, wrapI64, mulI64, addF32, mulF32, addF64, mulF64}) + ret.AsReturn([]Value{ + wrapI32, mulI32, negWrapI32, + wrapI64, mulI64, negWrapI64, + addF32, addF32_2, mulF32, + subF32, subF32_2, mulMinF32, + addF64, addF64_2, mulF64, + subF64, subF64_2, mulMinF64, + }) b.InsertInstruction(ret) return nil }, @@ -543,31 +587,55 @@ blk0: () v1:i32 = Iconst_32 0x1 v2:i32 = Iadd v0, v1 v3:i32 = Imul v0, v0 - v4:i64 = Iconst_64 0x7fffffffffffffff - v5:i64 = Iconst_64 0x1 - v6:i64 = Iadd v4, v5 - v7:i64 = Imul v4, v4 - v8:f32 = F32const 340282346638528859811704183484516925440.000000 - v9:f32 = F32const 1.000000 - v10:f32 = Fadd v8, v9 - v11:f32 = Fmul v8, v8 - v12:f64 = F64const 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000 - v13:f64 = F64const 1.000000 - v14:f64 = Fadd v12, v13 - v15:f64 = Fmul v12, v12 - Return v2, v3, v6, v7, v10, v11, v14, v15 + v4:i32 = Iconst_32 0x80000000 + v5:i32 = Isub v4, v1 + v6:i64 = Iconst_64 0x7fffffffffffffff + v7:i64 = Iconst_64 0x1 + v8:i64 = Iadd v6, v7 + v9:i64 = Imul v6, v6 + v10:i64 = Iconst_64 0x8000000000000000 + v11:i64 = Isub v10, v7 + v12:f32 = F32const 3.4028235e+38 + v13:f32 = F32const 1 + v14:f32 = Fadd v12, v13 + v15:f32 = Fadd v12, v12 + v16:f32 = Fmul v12, v12 + v17:f32 = F32const -3.4028235e+38 + v18:f32 = Fsub v17, v13 + v19:f32 = Fadd v17, v17 + v20:f32 = Fmul v17, v17 + v21:f64 = F64const 1.7976931348623157e+308 + v22:f64 = F64const 1 + v23:f64 = Fadd v21, v22 + v24:f64 = Fadd v21, v21 + v25:f64 = Fmul v21, v21 + v26:f64 = F64const -1.7976931348623157e+308 + v27:f64 = Fsub v26, v22 + v28:f64 = Fadd v26, v26 + v29:f64 = Fmul v26, v26 + Return v2, v3, v5, v8, v9, v11, v14, v15, v16, v18, v19, v20, v23, v24, v25, v27, v28, v29 `, after: ` blk0: () v2:i32 = Iconst_32 0x80000000 v3:i32 = Iconst_32 0x1 - v6:i64 = Iconst_64 0x8000000000000000 - v7:i64 = Iconst_64 0x1 - v10:f32 = F32const 340282346638528859811704183484516925440.000000 - v11:f32 = F32const +Inf - v14:f64 = F64const 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000 - v15:f64 = F64const +Inf - Return v2, v3, v6, v7, v10, v11, v14, v15 + v5:i32 = Iconst_32 0x7fffffff + v8:i64 = Iconst_64 0x8000000000000000 + v9:i64 = Iconst_64 0x1 + v11:i64 = Iconst_64 0x7fffffffffffffff + v14:f32 = F32const 3.4028235e+38 + v15:f32 = F32const +Inf + v16:f32 = F32const +Inf + v18:f32 = F32const -3.4028235e+38 + v19:f32 = F32const -Inf + v20:f32 = F32const +Inf + v23:f64 = F64const 1.7976931348623157e+308 + v24:f64 = F64const +Inf + v25:f64 = F64const +Inf + v27:f64 = F64const -1.7976931348623157e+308 + v28:f64 = F64const -Inf + v29:f64 = F64const +Inf + Return v2, v3, v5, v8, v9, v11, v14, v15, v16, v18, v19, v20, v23, v24, v25, v27, v28, v29 `, }, } { From cddeb203786ba3e9fcf962bbbb0268958039b350 Mon Sep 17 00:00:00 2001 From: Edoardo Vacchi Date: Thu, 30 Nov 2023 10:53:25 +0100 Subject: [PATCH 15/22] use %g to format float constants (ssa) Signed-off-by: Edoardo Vacchi --- .../engine/wazevo/frontend/frontend_test.go | 60 +++++++++---------- internal/engine/wazevo/ssa/instructions.go | 4 +- internal/engine/wazevo/ssa/pass_test.go | 26 ++++---- 3 files changed, 45 insertions(+), 45 deletions(-) diff --git a/internal/engine/wazevo/frontend/frontend_test.go b/internal/engine/wazevo/frontend/frontend_test.go index f50d3218cc..7ddb821292 100644 --- a/internal/engine/wazevo/frontend/frontend_test.go +++ b/internal/engine/wazevo/frontend/frontend_test.go @@ -95,8 +95,8 @@ blk0: (exec_ctx:i64, module_ctx:i64, v2:i32, v3:i32) blk0: (exec_ctx:i64, module_ctx:i64) v2:i32 = Iconst_32 0x0 v3:i64 = Iconst_64 0x0 - v4:f32 = F32const 0.000000 - v5:f64 = F64const 0.000000 + v4:f32 = F32const 0 + v5:f64 = F64const 0 Jump blk_ret `, expAfterOpt: ` @@ -136,8 +136,8 @@ blk0: (exec_ctx:i64, module_ctx:i64, v2:i32) blk0: (exec_ctx:i64, module_ctx:i64, v2:i64, v3:f32, v4:f64) v5:i32 = Iconst_32 0x0 v6:i64 = Iconst_64 0x0 - v7:f32 = F32const 0.000000 - v8:f64 = F64const 0.000000 + v7:f32 = F32const 0 + v8:f64 = F64const 0 v9:i64 = Iadd v2, v2 v10:i64 = Isub v9, v2 v11:f32 = Fadd v3, v3 @@ -204,8 +204,8 @@ blk1: () <-- (blk0) blk0: (exec_ctx:i64, module_ctx:i64) v2:i32 = Iconst_32 0x0 v3:i64 = Iconst_64 0x0 - v4:f32 = F32const 0.000000 - v5:f64 = F64const 0.000000 + v4:f32 = F32const 0 + v5:f64 = F64const 0 Jump blk1 blk1: () <-- (blk0) @@ -311,8 +311,8 @@ blk3: () <-- (blk1) blk0: (exec_ctx:i64, module_ctx:i64) v2:i32 = Iconst_32 0x0 v3:i64 = Iconst_64 0x0 - v4:f32 = F32const 0.000000 - v5:f64 = F64const 0.000000 + v4:f32 = F32const 0 + v5:f64 = F64const 0 Jump blk1 blk1: () <-- (blk0) @@ -885,45 +885,45 @@ blk0: (exec_ctx:i64, module_ctx:i64, v2:i32, v3:f32) v60:i32 = Iadd v9, v59 v61:i32 = Iadd v7, v60 v62:i32 = Iadd v5, v61 - v63:f32 = F32const 1.000000 + v63:f32 = F32const 1 v64:f32 = Fmul v3, v63 - v65:f32 = F32const 2.000000 + v65:f32 = F32const 2 v66:f32 = Fmul v3, v65 - v67:f32 = F32const 3.000000 + v67:f32 = F32const 3 v68:f32 = Fmul v3, v67 - v69:f32 = F32const 4.000000 + v69:f32 = F32const 4 v70:f32 = Fmul v3, v69 - v71:f32 = F32const 5.000000 + v71:f32 = F32const 5 v72:f32 = Fmul v3, v71 - v73:f32 = F32const 6.000000 + v73:f32 = F32const 6 v74:f32 = Fmul v3, v73 - v75:f32 = F32const 7.000000 + v75:f32 = F32const 7 v76:f32 = Fmul v3, v75 - v77:f32 = F32const 8.000000 + v77:f32 = F32const 8 v78:f32 = Fmul v3, v77 - v79:f32 = F32const 9.000000 + v79:f32 = F32const 9 v80:f32 = Fmul v3, v79 - v81:f32 = F32const 10.000000 + v81:f32 = F32const 10 v82:f32 = Fmul v3, v81 - v83:f32 = F32const 11.000000 + v83:f32 = F32const 11 v84:f32 = Fmul v3, v83 - v85:f32 = F32const 12.000000 + v85:f32 = F32const 12 v86:f32 = Fmul v3, v85 - v87:f32 = F32const 13.000000 + v87:f32 = F32const 13 v88:f32 = Fmul v3, v87 - v89:f32 = F32const 14.000000 + v89:f32 = F32const 14 v90:f32 = Fmul v3, v89 - v91:f32 = F32const 15.000000 + v91:f32 = F32const 15 v92:f32 = Fmul v3, v91 - v93:f32 = F32const 16.000000 + v93:f32 = F32const 16 v94:f32 = Fmul v3, v93 - v95:f32 = F32const 17.000000 + v95:f32 = F32const 17 v96:f32 = Fmul v3, v95 - v97:f32 = F32const 18.000000 + v97:f32 = F32const 18 v98:f32 = Fmul v3, v97 - v99:f32 = F32const 19.000000 + v99:f32 = F32const 19 v100:f32 = Fmul v3, v99 - v101:f32 = F32const 20.000000 + v101:f32 = F32const 20 v102:f32 = Fmul v3, v101 v103:f32 = Fadd v100, v102 v104:f32 = Fadd v98, v103 @@ -1356,10 +1356,10 @@ blk0: (exec_ctx:i64, module_ctx:i64) v4:i64 = Iconst_64 0x2 v5:i64 = Load module_ctx, 0x10 Store v4, v5, 0x8 - v6:f32 = F32const 3.000000 + v6:f32 = F32const 3 v7:i64 = Load module_ctx, 0x18 Store v6, v7, 0x8 - v8:f64 = F64const 4.000000 + v8:f64 = F64const 4 v9:i64 = Load module_ctx, 0x20 Store v8, v9, 0x8 Jump blk_ret, v2, v4, v6, v8 diff --git a/internal/engine/wazevo/ssa/instructions.go b/internal/engine/wazevo/ssa/instructions.go index 97730e81a5..fa3df6e824 100644 --- a/internal/engine/wazevo/ssa/instructions.go +++ b/internal/engine/wazevo/ssa/instructions.go @@ -2394,9 +2394,9 @@ func (i *Instruction) Format(b Builder) string { case OpcodeVconst: instSuffix = fmt.Sprintf(" %016x %016x", i.u1, i.u2) case OpcodeF32const: - instSuffix = fmt.Sprintf(" %f", math.Float32frombits(uint32(i.u1))) + instSuffix = fmt.Sprintf(" %g", math.Float32frombits(uint32(i.u1))) case OpcodeF64const: - instSuffix = fmt.Sprintf(" %f", math.Float64frombits(i.u1)) + instSuffix = fmt.Sprintf(" %g", math.Float64frombits(i.u1)) case OpcodeReturn: if len(i.vs) == 0 { break diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go index 57dc4230f4..0528a9485c 100644 --- a/internal/engine/wazevo/ssa/pass_test.go +++ b/internal/engine/wazevo/ssa/pass_test.go @@ -472,20 +472,20 @@ blk0: () v13:i64 = Iconst_64 0x4 v14:i64 = Isub v13, v12 v15:i64 = Imul v14, v14 - v16:f32 = F32const 1.000000 - v17:f32 = F32const 2.000000 + v16:f32 = F32const 1 + v17:f32 = F32const 2 v18:f32 = Fadd v16, v17 - v19:f32 = F32const 3.000000 + v19:f32 = F32const 3 v20:f32 = Fadd v18, v19 - v21:f32 = F32const 4.000000 + v21:f32 = F32const 4 v22:f32 = Fsub v21, v20 v23:f32 = Fmul v22, v22 - v24:f64 = F64const 1.000000 - v25:f64 = F64const 2.000000 + v24:f64 = F64const 1 + v25:f64 = F64const 2 v26:f64 = Fadd v24, v25 - v27:f64 = F64const 3.000000 + v27:f64 = F64const 3 v28:f64 = Fadd v26, v27 - v29:f64 = F64const 4.000000 + v29:f64 = F64const 4 v30:f64 = Fsub v29, v28 v31:f64 = Fmul v30, v30 Return v7, v14, v15, v22, v23, v26, v30, v31 @@ -495,11 +495,11 @@ blk0: () v7:i32 = Iconst_32 0x4 v14:i64 = Iconst_64 0xfffffffffffffffe v15:i64 = Iconst_64 0x4 - v22:f32 = F32const -2.000000 - v23:f32 = F32const 4.000000 - v26:f64 = F64const 3.000000 - v30:f64 = F64const -2.000000 - v31:f64 = F64const 4.000000 + v22:f32 = F32const -2 + v23:f32 = F32const 4 + v26:f64 = F64const 3 + v30:f64 = F64const -2 + v31:f64 = F64const 4 Return v7, v14, v15, v22, v23, v26, v30, v31 `, }, From c8ec025267b64cba2706cecf46d15b7fa6213ec9 Mon Sep 17 00:00:00 2001 From: Edoardo Vacchi Date: Mon, 4 Dec 2023 17:08:46 +0100 Subject: [PATCH 16/22] simplify constFold Signed-off-by: Edoardo Vacchi --- internal/engine/wazevo/ssa/pass.go | 41 ++++++++++--------------- internal/engine/wazevo/ssa/pass_test.go | 1 + 2 files changed, 18 insertions(+), 24 deletions(-) diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go index 24de30bb1b..a51be8bd45 100644 --- a/internal/engine/wazevo/ssa/pass.go +++ b/internal/engine/wazevo/ssa/pass.go @@ -369,15 +369,18 @@ func passCollectValueIdToInstructionMapping(b *builder) { } } +// passConstFoldingOptMaxIter controls the max number of iterations per-BB, before giving up. +const passConstFoldingOptMaxIter = 10 + // passConstFoldingOpt scans all instructions for arithmetic operations over constants, -// and replaces them with a const of their result. +// and replaces them with a const of their result. Repeats for each basic blocks until +// a fixed point is reached or num iter == passConstFoldingOptMaxIter. func passConstFoldingOpt(b *builder) { for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() { for cur := blk.rootInstr; cur != nil; cur = cur.next { // The fixed point is reached through a simple iteration over the list of instructions. // Note: Instead of just an unbounded loop with a flag, we may also add an upper bound to the number of iterations. - isFixedPoint := false - for !isFixedPoint { + for iter, isFixedPoint := 0, false; iter < passConstFoldingOptMaxIter && !isFixedPoint; iter++ { isFixedPoint = true op := cur.Opcode() switch op { @@ -399,26 +402,16 @@ func passConstFoldingOpt(b *builder) { // Clear the references to operands. cur.v, cur.v2 = ValueInvalid, ValueInvalid // We assume all the types are consistent. - if x.Type().Bits() == 64 { - xc, yc := int64(xDef.ConstantVal()), int64(yDef.ConstantVal()) - switch op { - case OpcodeIadd: - cur.u1 = uint64(xc + yc) - case OpcodeIsub: - cur.u1 = uint64(xc - yc) - case OpcodeImul: - cur.u1 = uint64(xc * yc) - } - } else { - xc, yc := int32(xDef.ConstantVal()), int32(yDef.ConstantVal()) - switch op { - case OpcodeIadd: - cur.u1 = uint64(xc + yc) - case OpcodeIsub: - cur.u1 = uint64(xc - yc) - case OpcodeImul: - cur.u1 = uint64(xc * yc) - } + // Signed integers are 2 complement, so we can just apply the operations. + // Operations are evaluated over uint64s and will be bitcasted at the use-sites. + xc, yc := xDef.ConstantVal(), yDef.ConstantVal() + switch op { + case OpcodeIadd: + cur.u1 = xc + yc + case OpcodeIsub: + cur.u1 = xc - yc + case OpcodeImul: + cur.u1 = xc * yc } } case OpcodeFadd, OpcodeFsub, OpcodeFmul: @@ -426,7 +419,7 @@ func passConstFoldingOpt(b *builder) { xDef := b.valueIDToInstruction[x.ID()] yDef := b.valueIDToInstruction[y.ID()] if xDef == nil || yDef == nil { - // If we are adding together some parameter, ignore. + // If we are composing some parameter, ignore. continue } if xDef.Constant() && yDef.Constant() { diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go index 0528a9485c..5090252da4 100644 --- a/internal/engine/wazevo/ssa/pass_test.go +++ b/internal/engine/wazevo/ssa/pass_test.go @@ -528,6 +528,7 @@ blk0: () oneI64 := b.AllocateInstruction().AsIconst64(1).Insert(b).Return() // Iadd MaxInt64, 1 overflows and wraps around to 0x8000000000000000 (min representable Int64) wrapI64 := b.AllocateInstruction().AsIadd(maxI64, oneI64).Insert(b).Return() + // Imul MaxInt64, MaxInt64 overflows and wraps around to 0x1. mulI64 := b.AllocateInstruction().AsImul(maxI64, maxI64).Insert(b).Return() // Explicitly using the constant because math.MinInt64 is not representable. From 2686b575570c3fad6faaea68ba2e0582deb44e05 Mon Sep 17 00:00:00 2001 From: Edoardo Vacchi Date: Mon, 4 Dec 2023 21:09:45 +0100 Subject: [PATCH 17/22] some work on algebraic simplification for Iadd Signed-off-by: Edoardo Vacchi --- internal/engine/wazevo/ssa/pass.go | 92 ++++++++++++++++++++++++- internal/engine/wazevo/ssa/pass_test.go | 50 +++++++++++++- 2 files changed, 140 insertions(+), 2 deletions(-) diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go index a51be8bd45..9e96ac2c15 100644 --- a/internal/engine/wazevo/ssa/pass.go +++ b/internal/engine/wazevo/ssa/pass.go @@ -38,6 +38,9 @@ func (b *builder) RunPasses() { // and more! passConstFoldingOpt(b) + passAlgebraicSimplification(b) + + passCollectValueIdToInstructionMapping(b) // passDeadCodeEliminationOpt could be more accurate if we do this after other optimizations. passDeadCodeEliminationOpt(b) @@ -369,7 +372,7 @@ func passCollectValueIdToInstructionMapping(b *builder) { } } -// passConstFoldingOptMaxIter controls the max number of iterations per-BB, before giving up. +// passConstFoldingOptMaxIter controls the max number of iterations per-BB in passConstFoldingOpt, before giving up. const passConstFoldingOptMaxIter = 10 // passConstFoldingOpt scans all instructions for arithmetic operations over constants, @@ -460,6 +463,93 @@ func passConstFoldingOpt(b *builder) { } } +// passAlgebraicSimplificationMaxIter controls the max number of iterations per-BB in passAlgebraicSimplification, before giving up. +const passAlgebraicSimplificationMaxIter = 10 + +// passAlgebraicSimplification performs algebraic simplification. +func passAlgebraicSimplification(b *builder) { + // isConstant is a utility for nil-safe check for Constant(). It can be moved into inst.Constant() if useful. + isConstant := func(inst *Instruction) bool { return inst != nil && inst.Constant() } + // isCanonical returns true when the given pair of instruction resolves to non-constant, constant. + isCanonical := func(a, b *Instruction) bool { return !isConstant(a) && isConstant(b) } + makeAddConstant := func(yDef, wDef *Instruction) *Instruction { + // Create a const as wide as yDef (either 32 or 64-bits), sum the two consts in cur and xDef. + // We are assuming the types match. + instr := b.AllocateInstruction() + instr.opcode = OpcodeIconst + instr.typ = yDef.typ + instr.u1 = yDef.ConstantVal() + wDef.ConstantVal() + instr.rValue = b.allocateValue(yDef.typ) + return instr + } + // TODO: We should first canonicalize operations. E.g, Iadd const, v => Iadd v, const. + for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() { + for iter, isFixedPoint := 0, false; iter < passAlgebraicSimplificationMaxIter && !isFixedPoint; iter++ { + isFixedPoint = true + for cur := blk.rootInstr; cur != nil; cur = cur.next { + // The fixed point is reached through a simple iteration over the list of instructions. + // Note: Instead of just an unbounded loop with a flag, we may also add an upper bound to the number of iterations. + op := cur.Opcode() + switch op { + // For a given sequence of instructions: + // C0 = Iconst_(32|64) ... + // C1 = Iconst_(32|64) ... + // V0 = ... + // V1 = Iadd V0, C0 + // Vn = Iadd V1, C1 + // Rewrites Vn to: + // Ck = Iconst_(32|64) C0+C1 + // Vn = Iadd V0, Ck + // C0, C1, V0, V1 might be deleted by passDeadCodeEliminationOpt + // if they are not referenced by other instructions. + case OpcodeIadd: + x, y := cur.Arg2() + xDef, yDef := b.valueIDToInstruction[x.ID()], b.valueIDToInstruction[y.ID()] + // Only apply if the referenced value was defined by an Iadd. + if xDef == nil || xDef.Opcode() != OpcodeIadd { + continue + } + // Canonical representation is `Iadd Value, Const` + if !isCanonical(xDef, yDef) { + continue + } + // Verify the instruction xDef is in the form `Iadd Value, Const` + v, w := xDef.Arg2() + vDef, wDef := b.valueIDToInstruction[v.ID()], b.valueIDToInstruction[w.ID()] + if !isCanonical(vDef, wDef) { + continue + } + + isFixedPoint = false + + // Create a const as wide as yDef (either 32 or 64-bits), sum the two consts in cur and xDef. + // We are assuming the types match. + instr := makeAddConstant(yDef, wDef) + // Update the current instruction to point to the value referenced by xDef and the new const. + cur.v, cur.v2 = v, instr.Return() + + // Update or append the new valueId to the mapping slice. + if int(b.nextValueID) >= len(b.valueIDToInstruction) { + b.valueIDToInstruction = append(b.valueIDToInstruction, instr) + } else { + b.valueIDToInstruction[instr.Return().ID()] = instr + } + + // Insert the new instruction in the linked list between cur.prev and cur. + cur.prev.next = instr + instr.prev = cur.prev + cur.prev = instr + instr.next = cur + + if cur == blk.rootInstr { + blk.rootInstr = instr + } + } + } + } + } +} + // passSortSuccessors sorts the successors of each block in the natural program order. func passSortSuccessors(b *builder) { for i := 0; i < b.basicBlocksPool.Allocated(); i++ { diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go index 5090252da4..cf0fb8a8d4 100644 --- a/internal/engine/wazevo/ssa/pass_test.go +++ b/internal/engine/wazevo/ssa/pass_test.go @@ -528,7 +528,7 @@ blk0: () oneI64 := b.AllocateInstruction().AsIconst64(1).Insert(b).Return() // Iadd MaxInt64, 1 overflows and wraps around to 0x8000000000000000 (min representable Int64) wrapI64 := b.AllocateInstruction().AsIadd(maxI64, oneI64).Insert(b).Return() - // Imul MaxInt64, MaxInt64 overflows and wraps around to 0x1. + // Imul MaxInt64, MaxInt32 overflows and wraps around to 0x1. mulI64 := b.AllocateInstruction().AsImul(maxI64, maxI64).Insert(b).Return() // Explicitly using the constant because math.MinInt64 is not representable. @@ -637,6 +637,54 @@ blk0: () v28:f64 = F64const -Inf v29:f64 = F64const +Inf Return v2, v3, v5, v8, v9, v11, v14, v15, v16, v18, v19, v20, v23, v24, v25, v27, v28, v29 +`, + }, + { + name: "algebraic simplification", + prePass: passCollectValueIdToInstructionMapping, + pass: passAlgebraicSimplification, + postPass: passDeadCodeEliminationOpt, + setup: func(b *builder) (verifier func(t *testing.T)) { + entry := b.AllocateBasicBlock() + b.SetCurrentBlock(entry) + + i32Param := entry.AddParam(b, TypeI32) + i64Param := entry.AddParam(b, TypeI64) + + oneI32 := b.AllocateInstruction().AsIconst32(1).Insert(b).Return() + twoI32 := b.AllocateInstruction().AsIconst32(2).Insert(b).Return() + res1I32 := b.AllocateInstruction().AsIadd(i32Param, oneI32).Insert(b).Return() + res2I32 := b.AllocateInstruction().AsIadd(res1I32, twoI32).Insert(b).Return() + + oneI64 := b.AllocateInstruction().AsIconst64(1).Insert(b).Return() + twoI64 := b.AllocateInstruction().AsIconst64(2).Insert(b).Return() + res1I64 := b.AllocateInstruction().AsIadd(i64Param, oneI64).Insert(b).Return() + res2I64 := b.AllocateInstruction().AsIadd(res1I64, twoI64).Insert(b).Return() + + ret := b.AllocateInstruction() + ret.AsReturn([]Value{res2I32, res2I64}) + b.InsertInstruction(ret) + return nil + }, + before: ` +blk0: (v0:i32, v1:i64) + v2:i32 = Iconst_32 0x1 + v3:i32 = Iconst_32 0x2 + v4:i32 = Iadd v0, v2 + v5:i32 = Iadd v4, v3 + v6:i64 = Iconst_64 0x1 + v7:i64 = Iconst_64 0x2 + v8:i64 = Iadd v1, v6 + v9:i64 = Iadd v8, v7 + Return v5, v9 +`, + after: ` +blk0: (v0:i32, v1:i64) + v10:i32 = Iconst_32 0x3 + v5:i32 = Iadd v0, v10 + v11:i64 = Iconst_64 0x3 + v9:i64 = Iadd v1, v11 + Return v5, v9 `, }, } { From 15901190536ce711c822385fe436c3c043f0ffc2 Mon Sep 17 00:00:00 2001 From: Edoardo Vacchi Date: Tue, 12 Dec 2023 19:39:15 +0100 Subject: [PATCH 18/22] add more cases, perf still not particularly interesting Signed-off-by: Edoardo Vacchi --- internal/engine/wazevo/ssa/pass.go | 45 ++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go index 9e96ac2c15..955436b588 100644 --- a/internal/engine/wazevo/ssa/pass.go +++ b/internal/engine/wazevo/ssa/pass.go @@ -373,7 +373,7 @@ func passCollectValueIdToInstructionMapping(b *builder) { } // passConstFoldingOptMaxIter controls the max number of iterations per-BB in passConstFoldingOpt, before giving up. -const passConstFoldingOptMaxIter = 10 +const passConstFoldingOptMaxIter = math.MaxInt // passConstFoldingOpt scans all instructions for arithmetic operations over constants, // and replaces them with a const of their result. Repeats for each basic blocks until @@ -408,14 +408,7 @@ func passConstFoldingOpt(b *builder) { // Signed integers are 2 complement, so we can just apply the operations. // Operations are evaluated over uint64s and will be bitcasted at the use-sites. xc, yc := xDef.ConstantVal(), yDef.ConstantVal() - switch op { - case OpcodeIadd: - cur.u1 = xc + yc - case OpcodeIsub: - cur.u1 = xc - yc - case OpcodeImul: - cur.u1 = xc * yc - } + cur.u1 = eval(op, xc, yc) } case OpcodeFadd, OpcodeFsub, OpcodeFmul: x, y := cur.Arg2() @@ -463,6 +456,25 @@ func passConstFoldingOpt(b *builder) { } } +func eval(op Opcode, xc uint64, yc uint64) uint64 { + switch op { + case OpcodeIadd: + return xc + yc + case OpcodeIsub: + return xc - yc + case OpcodeImul: + return xc * yc + case OpcodeBor: + return xc | yc + case OpcodeBand: + return xc & yc + case OpcodeBxor: + return xc ^ yc + default: + panic("unhandled default case") + } +} + // passAlgebraicSimplificationMaxIter controls the max number of iterations per-BB in passAlgebraicSimplification, before giving up. const passAlgebraicSimplificationMaxIter = 10 @@ -472,14 +484,15 @@ func passAlgebraicSimplification(b *builder) { isConstant := func(inst *Instruction) bool { return inst != nil && inst.Constant() } // isCanonical returns true when the given pair of instruction resolves to non-constant, constant. isCanonical := func(a, b *Instruction) bool { return !isConstant(a) && isConstant(b) } - makeAddConstant := func(yDef, wDef *Instruction) *Instruction { + makeConstant := func(yDef, wDef *Instruction, op Opcode) *Instruction { // Create a const as wide as yDef (either 32 or 64-bits), sum the two consts in cur and xDef. // We are assuming the types match. instr := b.AllocateInstruction() instr.opcode = OpcodeIconst instr.typ = yDef.typ - instr.u1 = yDef.ConstantVal() + wDef.ConstantVal() instr.rValue = b.allocateValue(yDef.typ) + yc, wc := yDef.ConstantVal(), wDef.ConstantVal() + instr.u1 = eval(op, yc, wc) return instr } // TODO: We should first canonicalize operations. E.g, Iadd const, v => Iadd v, const. @@ -502,11 +515,11 @@ func passAlgebraicSimplification(b *builder) { // Vn = Iadd V0, Ck // C0, C1, V0, V1 might be deleted by passDeadCodeEliminationOpt // if they are not referenced by other instructions. - case OpcodeIadd: + case OpcodeIadd, OpcodeImul, OpcodeBor, OpcodeBand, OpcodeBxor: x, y := cur.Arg2() xDef, yDef := b.valueIDToInstruction[x.ID()], b.valueIDToInstruction[y.ID()] - // Only apply if the referenced value was defined by an Iadd. - if xDef == nil || xDef.Opcode() != OpcodeIadd { + // Only apply if the referenced value was defined by the same instruction. + if xDef == nil || xDef.Opcode() != op { continue } // Canonical representation is `Iadd Value, Const` @@ -524,7 +537,7 @@ func passAlgebraicSimplification(b *builder) { // Create a const as wide as yDef (either 32 or 64-bits), sum the two consts in cur and xDef. // We are assuming the types match. - instr := makeAddConstant(yDef, wDef) + instr := makeConstant(yDef, wDef, op) // Update the current instruction to point to the value referenced by xDef and the new const. cur.v, cur.v2 = v, instr.Return() @@ -544,6 +557,8 @@ func passAlgebraicSimplification(b *builder) { if cur == blk.rootInstr { blk.rootInstr = instr } + default: + continue } } } From 796b9697ad1867ac011fcf5811815fb85fbd403e Mon Sep 17 00:00:00 2001 From: Edoardo Vacchi Date: Fri, 15 Dec 2023 15:53:02 +0100 Subject: [PATCH 19/22] wip Signed-off-by: Edoardo Vacchi --- internal/engine/wazevo/ssa/pass.go | 133 +++++++---------------------- 1 file changed, 32 insertions(+), 101 deletions(-) diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go index 955436b588..e082acdbc6 100644 --- a/internal/engine/wazevo/ssa/pass.go +++ b/internal/engine/wazevo/ssa/pass.go @@ -23,8 +23,6 @@ func (b *builder) RunPasses() { passCalculateImmediateDominators(b) passCollectValueIdToInstructionMapping(b) - passNopInstElimination(b) - // TODO: implement either conversion of irreducible CFG into reducible one, or irreducible CFG detection where we panic. // WebAssembly program shouldn't result in irreducible CFG, but we should handle it properly in just in case. // See FixIrreducible pass in LLVM: https://llvm.org/doxygen/FixIrreducible_8cpp_source.html @@ -38,7 +36,7 @@ func (b *builder) RunPasses() { // and more! passConstFoldingOpt(b) - passAlgebraicSimplification(b) + passNopInstElimination(b) passCollectValueIdToInstructionMapping(b) @@ -333,10 +331,10 @@ func passNopInstElimination(b *builder) { b.alias(cur.Return(), x) } } - // Z := Const 0 - // (Iadd X, Z) => X - // (Iadd Z, Y) => Y - case OpcodeIadd: + // When Op is Iadd, Bor, Bxor, Rotl or Rotr, and Z is Iconst 0: + // (Op X, Z) => X + // (Op Z, Y) => Y + case OpcodeIadd, OpcodeBor, OpcodeBxor, OpcodeRotl, OpcodeRotr: x, y := cur.Arg2() definingInst := b.valueIDToInstruction[y.ID()] if definingInst == nil { @@ -349,6 +347,26 @@ func passNopInstElimination(b *builder) { if definingInst.Constant() && definingInst.ConstantVal() == 0 { b.alias(cur.Return(), x) } + // When Op is Imul and Z is Iconst 1: + // (Op X, Z) => X + // (Op Z, Y) => Y + // TODO: This is also valid for UDiv, SDiv, but they are trapping, so we would + // need to update passDeadCodeEliminationOpt to account for this case and mark them dead. + case OpcodeImul: + x, y := cur.Arg2() + definingInst := b.valueIDToInstruction[y.ID()] + if definingInst == nil { + if definingInst = b.valueIDToInstruction[x.ID()]; definingInst == nil { + continue + } else { + x = y + } + } + if definingInst.Constant() && definingInst.ConstantVal() == 1 { + b.alias(cur.Return(), x) + } + default: + continue } } } @@ -389,7 +407,7 @@ func passConstFoldingOpt(b *builder) { switch op { // X := Const xc // Y := Const yc - // - (Iadd X, Y) => Const (xc + yc) + // - (op X, Y) => Const (xc yc); e.g. if op is Iadd => xc + yc. case OpcodeIadd, OpcodeIsub, OpcodeImul: x, y := cur.Arg2() xDef := b.valueIDToInstruction[x.ID()] @@ -408,8 +426,11 @@ func passConstFoldingOpt(b *builder) { // Signed integers are 2 complement, so we can just apply the operations. // Operations are evaluated over uint64s and will be bitcasted at the use-sites. xc, yc := xDef.ConstantVal(), yDef.ConstantVal() - cur.u1 = eval(op, xc, yc) + cur.u1 = evalArithmeticOp(op, xc, yc) } + // X := Const xc + // Y := Const yc + // - (op X, Y) => Const (xc yc); e.g. if op is Fadd => xc + yc. case OpcodeFadd, OpcodeFsub, OpcodeFmul: x, y := cur.Arg2() xDef := b.valueIDToInstruction[x.ID()] @@ -456,7 +477,7 @@ func passConstFoldingOpt(b *builder) { } } -func eval(op Opcode, xc uint64, yc uint64) uint64 { +func evalArithmeticOp(op Opcode, xc uint64, yc uint64) uint64 { switch op { case OpcodeIadd: return xc + yc @@ -471,97 +492,7 @@ func eval(op Opcode, xc uint64, yc uint64) uint64 { case OpcodeBxor: return xc ^ yc default: - panic("unhandled default case") - } -} - -// passAlgebraicSimplificationMaxIter controls the max number of iterations per-BB in passAlgebraicSimplification, before giving up. -const passAlgebraicSimplificationMaxIter = 10 - -// passAlgebraicSimplification performs algebraic simplification. -func passAlgebraicSimplification(b *builder) { - // isConstant is a utility for nil-safe check for Constant(). It can be moved into inst.Constant() if useful. - isConstant := func(inst *Instruction) bool { return inst != nil && inst.Constant() } - // isCanonical returns true when the given pair of instruction resolves to non-constant, constant. - isCanonical := func(a, b *Instruction) bool { return !isConstant(a) && isConstant(b) } - makeConstant := func(yDef, wDef *Instruction, op Opcode) *Instruction { - // Create a const as wide as yDef (either 32 or 64-bits), sum the two consts in cur and xDef. - // We are assuming the types match. - instr := b.AllocateInstruction() - instr.opcode = OpcodeIconst - instr.typ = yDef.typ - instr.rValue = b.allocateValue(yDef.typ) - yc, wc := yDef.ConstantVal(), wDef.ConstantVal() - instr.u1 = eval(op, yc, wc) - return instr - } - // TODO: We should first canonicalize operations. E.g, Iadd const, v => Iadd v, const. - for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() { - for iter, isFixedPoint := 0, false; iter < passAlgebraicSimplificationMaxIter && !isFixedPoint; iter++ { - isFixedPoint = true - for cur := blk.rootInstr; cur != nil; cur = cur.next { - // The fixed point is reached through a simple iteration over the list of instructions. - // Note: Instead of just an unbounded loop with a flag, we may also add an upper bound to the number of iterations. - op := cur.Opcode() - switch op { - // For a given sequence of instructions: - // C0 = Iconst_(32|64) ... - // C1 = Iconst_(32|64) ... - // V0 = ... - // V1 = Iadd V0, C0 - // Vn = Iadd V1, C1 - // Rewrites Vn to: - // Ck = Iconst_(32|64) C0+C1 - // Vn = Iadd V0, Ck - // C0, C1, V0, V1 might be deleted by passDeadCodeEliminationOpt - // if they are not referenced by other instructions. - case OpcodeIadd, OpcodeImul, OpcodeBor, OpcodeBand, OpcodeBxor: - x, y := cur.Arg2() - xDef, yDef := b.valueIDToInstruction[x.ID()], b.valueIDToInstruction[y.ID()] - // Only apply if the referenced value was defined by the same instruction. - if xDef == nil || xDef.Opcode() != op { - continue - } - // Canonical representation is `Iadd Value, Const` - if !isCanonical(xDef, yDef) { - continue - } - // Verify the instruction xDef is in the form `Iadd Value, Const` - v, w := xDef.Arg2() - vDef, wDef := b.valueIDToInstruction[v.ID()], b.valueIDToInstruction[w.ID()] - if !isCanonical(vDef, wDef) { - continue - } - - isFixedPoint = false - - // Create a const as wide as yDef (either 32 or 64-bits), sum the two consts in cur and xDef. - // We are assuming the types match. - instr := makeConstant(yDef, wDef, op) - // Update the current instruction to point to the value referenced by xDef and the new const. - cur.v, cur.v2 = v, instr.Return() - - // Update or append the new valueId to the mapping slice. - if int(b.nextValueID) >= len(b.valueIDToInstruction) { - b.valueIDToInstruction = append(b.valueIDToInstruction, instr) - } else { - b.valueIDToInstruction[instr.Return().ID()] = instr - } - - // Insert the new instruction in the linked list between cur.prev and cur. - cur.prev.next = instr - instr.prev = cur.prev - cur.prev = instr - instr.next = cur - - if cur == blk.rootInstr { - blk.rootInstr = instr - } - default: - continue - } - } - } + panic("unhandled default case " + op.String()) } } From a4b5549381bdc86ad8888996fdb17b79e4ec3d96 Mon Sep 17 00:00:00 2001 From: Edoardo Vacchi Date: Fri, 15 Dec 2023 15:54:00 +0100 Subject: [PATCH 20/22] wip Signed-off-by: Edoardo Vacchi --- .../engine/wazevo/backend/backend_test.go | 280 +++++++++--------- 1 file changed, 138 insertions(+), 142 deletions(-) diff --git a/internal/engine/wazevo/backend/backend_test.go b/internal/engine/wazevo/backend/backend_test.go index 592e20a8d7..0fe7d4be0a 100644 --- a/internal/engine/wazevo/backend/backend_test.go +++ b/internal/engine/wazevo/backend/backend_test.go @@ -1394,126 +1394,124 @@ L1 (SSA Block: blk0): L1 (SSA Block: blk0): mov x130?, x2 mov v131?.8b, v0.8b - orr w289?, wzr, #0x1 - madd w133?, w130?, w289?, wzr - orr w288?, wzr, #0x2 - madd w135?, w130?, w288?, wzr - orr w287?, wzr, #0x3 - madd w137?, w130?, w287?, wzr - orr w286?, wzr, #0x4 - madd w139?, w130?, w286?, wzr - movz w285?, #0x5, lsl 0 - madd w141?, w130?, w285?, wzr - orr w284?, wzr, #0x6 - madd w143?, w130?, w284?, wzr - orr w283?, wzr, #0x7 - madd w145?, w130?, w283?, wzr - orr w282?, wzr, #0x8 - madd w147?, w130?, w282?, wzr - movz w281?, #0x9, lsl 0 - madd w149?, w130?, w281?, wzr - movz w280?, #0xa, lsl 0 - madd w151?, w130?, w280?, wzr - movz w279?, #0xb, lsl 0 - madd w153?, w130?, w279?, wzr - orr w278?, wzr, #0xc - madd w155?, w130?, w278?, wzr - movz w277?, #0xd, lsl 0 - madd w157?, w130?, w277?, wzr - orr w276?, wzr, #0xe - madd w159?, w130?, w276?, wzr - orr w275?, wzr, #0xf - madd w161?, w130?, w275?, wzr - orr w274?, wzr, #0x10 - madd w163?, w130?, w274?, wzr - movz w273?, #0x11, lsl 0 - madd w165?, w130?, w273?, wzr - movz w272?, #0x12, lsl 0 - madd w167?, w130?, w272?, wzr - movz w271?, #0x13, lsl 0 - madd w169?, w130?, w271?, wzr - movz w270?, #0x14, lsl 0 - madd w171?, w130?, w270?, wzr - add w172?, w169?, w171? - add w173?, w167?, w172? - add w174?, w165?, w173? - add w175?, w163?, w174? - add w176?, w161?, w175? - add w177?, w159?, w176? - add w178?, w157?, w177? - add w179?, w155?, w178? - add w180?, w153?, w179? - add w181?, w151?, w180? - add w182?, w149?, w181? - add w183?, w147?, w182? - add w184?, w145?, w183? - add w185?, w143?, w184? - add w186?, w141?, w185? - add w187?, w139?, w186? - add w188?, w137?, w187? - add w189?, w135?, w188? - add w190?, w133?, w189? - ldr s269?, #8; b 8; data.f32 1.000000 - fmul s192?, s131?, s269? - ldr s268?, #8; b 8; data.f32 2.000000 - fmul s194?, s131?, s268? - ldr s267?, #8; b 8; data.f32 3.000000 - fmul s196?, s131?, s267? - ldr s266?, #8; b 8; data.f32 4.000000 - fmul s198?, s131?, s266? - ldr s265?, #8; b 8; data.f32 5.000000 - fmul s200?, s131?, s265? - ldr s264?, #8; b 8; data.f32 6.000000 - fmul s202?, s131?, s264? - ldr s263?, #8; b 8; data.f32 7.000000 - fmul s204?, s131?, s263? - ldr s262?, #8; b 8; data.f32 8.000000 - fmul s206?, s131?, s262? - ldr s261?, #8; b 8; data.f32 9.000000 - fmul s208?, s131?, s261? - ldr s260?, #8; b 8; data.f32 10.000000 - fmul s210?, s131?, s260? - ldr s259?, #8; b 8; data.f32 11.000000 - fmul s212?, s131?, s259? - ldr s258?, #8; b 8; data.f32 12.000000 - fmul s214?, s131?, s258? - ldr s257?, #8; b 8; data.f32 13.000000 - fmul s216?, s131?, s257? - ldr s256?, #8; b 8; data.f32 14.000000 - fmul s218?, s131?, s256? - ldr s255?, #8; b 8; data.f32 15.000000 - fmul s220?, s131?, s255? - ldr s254?, #8; b 8; data.f32 16.000000 - fmul s222?, s131?, s254? - ldr s253?, #8; b 8; data.f32 17.000000 - fmul s224?, s131?, s253? - ldr s252?, #8; b 8; data.f32 18.000000 - fmul s226?, s131?, s252? - ldr s251?, #8; b 8; data.f32 19.000000 - fmul s228?, s131?, s251? - ldr s250?, #8; b 8; data.f32 20.000000 - fmul s230?, s131?, s250? - fadd s231?, s228?, s230? - fadd s232?, s226?, s231? - fadd s233?, s224?, s232? - fadd s234?, s222?, s233? - fadd s235?, s220?, s234? - fadd s236?, s218?, s235? - fadd s237?, s216?, s236? - fadd s238?, s214?, s237? - fadd s239?, s212?, s238? - fadd s240?, s210?, s239? - fadd s241?, s208?, s240? - fadd s242?, s206?, s241? - fadd s243?, s204?, s242? - fadd s244?, s202?, s243? - fadd s245?, s200?, s244? - fadd s246?, s198?, s245? - fadd s247?, s196?, s246? - fadd s248?, s194?, s247? - fadd s249?, s192?, s248? - mov v0.8b, v249?.8b - mov x0, x190? + orr w286?, wzr, #0x2 + madd w133?, w130?, w286?, wzr + orr w285?, wzr, #0x3 + madd w135?, w130?, w285?, wzr + orr w284?, wzr, #0x4 + madd w137?, w130?, w284?, wzr + movz w283?, #0x5, lsl 0 + madd w139?, w130?, w283?, wzr + orr w282?, wzr, #0x6 + madd w141?, w130?, w282?, wzr + orr w281?, wzr, #0x7 + madd w143?, w130?, w281?, wzr + orr w280?, wzr, #0x8 + madd w145?, w130?, w280?, wzr + movz w279?, #0x9, lsl 0 + madd w147?, w130?, w279?, wzr + movz w278?, #0xa, lsl 0 + madd w149?, w130?, w278?, wzr + movz w277?, #0xb, lsl 0 + madd w151?, w130?, w277?, wzr + orr w276?, wzr, #0xc + madd w153?, w130?, w276?, wzr + movz w275?, #0xd, lsl 0 + madd w155?, w130?, w275?, wzr + orr w274?, wzr, #0xe + madd w157?, w130?, w274?, wzr + orr w273?, wzr, #0xf + madd w159?, w130?, w273?, wzr + orr w272?, wzr, #0x10 + madd w161?, w130?, w272?, wzr + movz w271?, #0x11, lsl 0 + madd w163?, w130?, w271?, wzr + movz w270?, #0x12, lsl 0 + madd w165?, w130?, w270?, wzr + movz w269?, #0x13, lsl 0 + madd w167?, w130?, w269?, wzr + movz w268?, #0x14, lsl 0 + madd w169?, w130?, w268?, wzr + add w170?, w167?, w169? + add w171?, w165?, w170? + add w172?, w163?, w171? + add w173?, w161?, w172? + add w174?, w159?, w173? + add w175?, w157?, w174? + add w176?, w155?, w175? + add w177?, w153?, w176? + add w178?, w151?, w177? + add w179?, w149?, w178? + add w180?, w147?, w179? + add w181?, w145?, w180? + add w182?, w143?, w181? + add w183?, w141?, w182? + add w184?, w139?, w183? + add w185?, w137?, w184? + add w186?, w135?, w185? + add w187?, w133?, w186? + add w188?, w130?, w187? + ldr s267?, #8; b 8; data.f32 1.000000 + fmul s190?, s131?, s267? + ldr s266?, #8; b 8; data.f32 2.000000 + fmul s192?, s131?, s266? + ldr s265?, #8; b 8; data.f32 3.000000 + fmul s194?, s131?, s265? + ldr s264?, #8; b 8; data.f32 4.000000 + fmul s196?, s131?, s264? + ldr s263?, #8; b 8; data.f32 5.000000 + fmul s198?, s131?, s263? + ldr s262?, #8; b 8; data.f32 6.000000 + fmul s200?, s131?, s262? + ldr s261?, #8; b 8; data.f32 7.000000 + fmul s202?, s131?, s261? + ldr s260?, #8; b 8; data.f32 8.000000 + fmul s204?, s131?, s260? + ldr s259?, #8; b 8; data.f32 9.000000 + fmul s206?, s131?, s259? + ldr s258?, #8; b 8; data.f32 10.000000 + fmul s208?, s131?, s258? + ldr s257?, #8; b 8; data.f32 11.000000 + fmul s210?, s131?, s257? + ldr s256?, #8; b 8; data.f32 12.000000 + fmul s212?, s131?, s256? + ldr s255?, #8; b 8; data.f32 13.000000 + fmul s214?, s131?, s255? + ldr s254?, #8; b 8; data.f32 14.000000 + fmul s216?, s131?, s254? + ldr s253?, #8; b 8; data.f32 15.000000 + fmul s218?, s131?, s253? + ldr s252?, #8; b 8; data.f32 16.000000 + fmul s220?, s131?, s252? + ldr s251?, #8; b 8; data.f32 17.000000 + fmul s222?, s131?, s251? + ldr s250?, #8; b 8; data.f32 18.000000 + fmul s224?, s131?, s250? + ldr s249?, #8; b 8; data.f32 19.000000 + fmul s226?, s131?, s249? + ldr s248?, #8; b 8; data.f32 20.000000 + fmul s228?, s131?, s248? + fadd s229?, s226?, s228? + fadd s230?, s224?, s229? + fadd s231?, s222?, s230? + fadd s232?, s220?, s231? + fadd s233?, s218?, s232? + fadd s234?, s216?, s233? + fadd s235?, s214?, s234? + fadd s236?, s212?, s235? + fadd s237?, s210?, s236? + fadd s238?, s208?, s237? + fadd s239?, s206?, s238? + fadd s240?, s204?, s239? + fadd s241?, s202?, s240? + fadd s242?, s200?, s241? + fadd s243?, s198?, s242? + fadd s244?, s196?, s243? + fadd s245?, s194?, s244? + fadd s246?, s192?, s245? + fadd s247?, s190?, s246? + mov v0.8b, v247?.8b + mov x0, x188? ret `, afterFinalizeARM64: ` @@ -1539,47 +1537,44 @@ L1 (SSA Block: blk0): str q27, [sp, #-0x10]! movz x27, #0x120, lsl 0 str x27, [sp, #-0x10]! - orr w8, wzr, #0x1 + orr w8, wzr, #0x2 madd w8, w2, w8, wzr - orr w9, wzr, #0x2 + orr w9, wzr, #0x3 madd w9, w2, w9, wzr - orr w10, wzr, #0x3 + orr w10, wzr, #0x4 madd w10, w2, w10, wzr - orr w11, wzr, #0x4 + movz w11, #0x5, lsl 0 madd w11, w2, w11, wzr - movz w12, #0x5, lsl 0 + orr w12, wzr, #0x6 madd w12, w2, w12, wzr - orr w13, wzr, #0x6 + orr w13, wzr, #0x7 madd w13, w2, w13, wzr - orr w14, wzr, #0x7 + orr w14, wzr, #0x8 madd w14, w2, w14, wzr - orr w15, wzr, #0x8 + movz w15, #0x9, lsl 0 madd w15, w2, w15, wzr - movz w16, #0x9, lsl 0 + movz w16, #0xa, lsl 0 madd w16, w2, w16, wzr - movz w17, #0xa, lsl 0 + movz w17, #0xb, lsl 0 madd w17, w2, w17, wzr - movz w19, #0xb, lsl 0 + orr w19, wzr, #0xc madd w19, w2, w19, wzr - orr w20, wzr, #0xc + movz w20, #0xd, lsl 0 madd w20, w2, w20, wzr - movz w21, #0xd, lsl 0 + orr w21, wzr, #0xe madd w21, w2, w21, wzr - orr w22, wzr, #0xe + orr w22, wzr, #0xf madd w22, w2, w22, wzr - orr w23, wzr, #0xf + orr w23, wzr, #0x10 madd w23, w2, w23, wzr - orr w24, wzr, #0x10 + movz w24, #0x11, lsl 0 madd w24, w2, w24, wzr - movz w25, #0x11, lsl 0 + movz w25, #0x12, lsl 0 madd w25, w2, w25, wzr - movz w26, #0x12, lsl 0 + movz w26, #0x13, lsl 0 madd w26, w2, w26, wzr - movz w29, #0x13, lsl 0 + movz w29, #0x14, lsl 0 madd w29, w2, w29, wzr - movz w30, #0x14, lsl 0 - madd w30, w2, w30, wzr - add w29, w29, w30 add w26, w26, w29 add w25, w25, w26 add w24, w24, w25 @@ -1598,6 +1593,7 @@ L1 (SSA Block: blk0): add w10, w10, w11 add w9, w9, w10 add w8, w8, w9 + add w8, w2, w8 ldr s8, #8; b 8; data.f32 1.000000 fmul s8, s0, s8 ldr s9, #8; b 8; data.f32 2.000000 From d9a81d34913a5a05e9cb415b1443cd6d4cf3807c Mon Sep 17 00:00:00 2001 From: Edoardo Vacchi Date: Fri, 15 Dec 2023 15:57:07 +0100 Subject: [PATCH 21/22] format Signed-off-by: Edoardo Vacchi --- internal/engine/wazevo/ssa/pass_test.go | 48 ------------------------- 1 file changed, 48 deletions(-) diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go index cf0fb8a8d4..f5e46b7636 100644 --- a/internal/engine/wazevo/ssa/pass_test.go +++ b/internal/engine/wazevo/ssa/pass_test.go @@ -637,54 +637,6 @@ blk0: () v28:f64 = F64const -Inf v29:f64 = F64const +Inf Return v2, v3, v5, v8, v9, v11, v14, v15, v16, v18, v19, v20, v23, v24, v25, v27, v28, v29 -`, - }, - { - name: "algebraic simplification", - prePass: passCollectValueIdToInstructionMapping, - pass: passAlgebraicSimplification, - postPass: passDeadCodeEliminationOpt, - setup: func(b *builder) (verifier func(t *testing.T)) { - entry := b.AllocateBasicBlock() - b.SetCurrentBlock(entry) - - i32Param := entry.AddParam(b, TypeI32) - i64Param := entry.AddParam(b, TypeI64) - - oneI32 := b.AllocateInstruction().AsIconst32(1).Insert(b).Return() - twoI32 := b.AllocateInstruction().AsIconst32(2).Insert(b).Return() - res1I32 := b.AllocateInstruction().AsIadd(i32Param, oneI32).Insert(b).Return() - res2I32 := b.AllocateInstruction().AsIadd(res1I32, twoI32).Insert(b).Return() - - oneI64 := b.AllocateInstruction().AsIconst64(1).Insert(b).Return() - twoI64 := b.AllocateInstruction().AsIconst64(2).Insert(b).Return() - res1I64 := b.AllocateInstruction().AsIadd(i64Param, oneI64).Insert(b).Return() - res2I64 := b.AllocateInstruction().AsIadd(res1I64, twoI64).Insert(b).Return() - - ret := b.AllocateInstruction() - ret.AsReturn([]Value{res2I32, res2I64}) - b.InsertInstruction(ret) - return nil - }, - before: ` -blk0: (v0:i32, v1:i64) - v2:i32 = Iconst_32 0x1 - v3:i32 = Iconst_32 0x2 - v4:i32 = Iadd v0, v2 - v5:i32 = Iadd v4, v3 - v6:i64 = Iconst_64 0x1 - v7:i64 = Iconst_64 0x2 - v8:i64 = Iadd v1, v6 - v9:i64 = Iadd v8, v7 - Return v5, v9 -`, - after: ` -blk0: (v0:i32, v1:i64) - v10:i32 = Iconst_32 0x3 - v5:i32 = Iadd v0, v10 - v11:i64 = Iconst_64 0x3 - v9:i64 = Iadd v1, v11 - Return v5, v9 `, }, } { From 7ff515583914e2df435924512e4f1abbdfdfe789 Mon Sep 17 00:00:00 2001 From: Edoardo Vacchi Date: Fri, 15 Dec 2023 16:03:47 +0100 Subject: [PATCH 22/22] remove useless extra pass Signed-off-by: Edoardo Vacchi --- internal/engine/wazevo/ssa/pass.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go index e082acdbc6..3446311e11 100644 --- a/internal/engine/wazevo/ssa/pass.go +++ b/internal/engine/wazevo/ssa/pass.go @@ -38,8 +38,6 @@ func (b *builder) RunPasses() { passConstFoldingOpt(b) passNopInstElimination(b) - passCollectValueIdToInstructionMapping(b) - // passDeadCodeEliminationOpt could be more accurate if we do this after other optimizations. passDeadCodeEliminationOpt(b) b.donePasses = true