From 362a7302702f801dbc61e100a327706e8bc7db9c Mon Sep 17 00:00:00 2001
From: Edoardo Vacchi <evacchi@users.noreply.github.com>
Date: Tue, 28 Nov 2023 14:17:38 +0100
Subject: [PATCH 01/22] wip: constant folding

Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
---
 internal/engine/wazevo/ssa/pass.go      | 18 ++++++++++++++++++
 internal/engine/wazevo/ssa/pass_test.go | 24 +++++++++++++++++++++---
 2 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go
index 17c8486f60..f7f78e13a3 100644
--- a/internal/engine/wazevo/ssa/pass.go
+++ b/internal/engine/wazevo/ssa/pass.go
@@ -329,6 +329,8 @@ func passNopInstElimination(b *builder) {
 		for cur := blk.rootInstr; cur != nil; cur = cur.next {
 			switch cur.Opcode() {
 			// TODO: add more logics here.
+			// Amount := (Const $someValue)
+			// (Shift X, Amount) where Amount == x.Type.Bits() => X
 			case OpcodeIshl, OpcodeSshr, OpcodeUshr:
 				x, amount := cur.Arg2()
 				definingInst := b.valueIDToInstruction[amount.ID()]
@@ -348,6 +350,22 @@ func passNopInstElimination(b *builder) {
 						b.alias(cur.Return(), x)
 					}
 				}
+			// Z := Const 0
+			// (Iadd X, Z) => X
+			// (Iadd Z, Y) => Y
+			case OpcodeIadd:
+				x, y := cur.Arg2()
+				definingInst := b.valueIDToInstruction[y.ID()]
+				if definingInst == nil {
+					if definingInst = b.valueIDToInstruction[x.ID()]; definingInst == nil {
+						continue
+					} else {
+						x = y
+					}
+				}
+				if definingInst.Constant() && definingInst.ConstantVal() == 0 {
+					b.alias(cur.Return(), x)
+				}
 			}
 		}
 	}
diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go
index 2a831a67f1..a30a61e7ca 100644
--- a/internal/engine/wazevo/ssa/pass_test.go
+++ b/internal/engine/wazevo/ssa/pass_test.go
@@ -310,8 +310,20 @@ blk1: () <-- (blk0)
 				nonZeroI64 := b.AllocateInstruction().AsIconst64(64*245 + 1).Insert(b).Return()
 				nonZeroSshr := b.AllocateInstruction().AsSshr(i64Param, nonZeroI64).Insert(b).Return()
 
+				// Iadd32.
+				zero32 := b.AllocateInstruction().AsIconst32(0).Insert(b).Return()
+				nopIadd32 := b.AllocateInstruction().AsIadd(i32Param, zero32).Insert(b).Return()
+
+				// Iadd32.
+				zero32_2 := b.AllocateInstruction().AsIconst32(0).Insert(b).Return()
+				nopIadd32_2 := b.AllocateInstruction().AsIadd(zero32_2, i32Param).Insert(b).Return()
+
+				// Iadd64.
+				zero64 := b.AllocateInstruction().AsIconst64(0).Insert(b).Return()
+				nopIadd64 := b.AllocateInstruction().AsIadd(i64Param, zero64).Insert(b).Return()
+
 				ret := b.AllocateInstruction()
-				ret.AsReturn([]Value{nopIshl, nopUshr, nonZeroIshl, nonZeroSshr})
+				ret.AsReturn([]Value{nopIshl, nopUshr, nonZeroIshl, nonZeroSshr, nopIadd32, nopIadd32_2, nopIadd64})
 				b.InsertInstruction(ret)
 				return nil
 			},
@@ -325,7 +337,13 @@ blk0: (v0:i32, v1:i64)
 	v7:i32 = Ishl v0, v6
 	v8:i64 = Iconst_64 0x3d41
 	v9:i64 = Sshr v1, v8
-	Return v3, v5, v7, v9
+	v10:i32 = Iconst_32 0x0
+	v11:i32 = Iadd v0, v10
+	v12:i32 = Iconst_32 0x0
+	v13:i32 = Iadd v12, v0
+	v14:i64 = Iconst_64 0x0
+	v15:i64 = Iadd v1, v14
+	Return v3, v5, v7, v9, v11, v13, v15
 `,
 			after: `
 blk0: (v0:i32, v1:i64)
@@ -333,7 +351,7 @@ blk0: (v0:i32, v1:i64)
 	v7:i32 = Ishl v0, v6
 	v8:i64 = Iconst_64 0x3d41
 	v9:i64 = Sshr v1, v8
-	Return v0, v1, v7, v9
+	Return v0, v1, v7, v9, v0, v0, v1
 `,
 		},
 	} {

From fe56392beb9940bd8bb3b7e1496208d825b70972 Mon Sep 17 00:00:00 2001
From: Edoardo Vacchi <evacchi@users.noreply.github.com>
Date: Tue, 28 Nov 2023 17:04:01 +0100
Subject: [PATCH 02/22] wip

Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
---
 internal/engine/wazevo/ssa/pass.go      | 36 ++++++++++++++++-------
 internal/engine/wazevo/ssa/pass_test.go | 38 +++++++++++++++++--------
 2 files changed, 52 insertions(+), 22 deletions(-)

diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go
index f7f78e13a3..6cfb6689ec 100644
--- a/internal/engine/wazevo/ssa/pass.go
+++ b/internal/engine/wazevo/ssa/pass.go
@@ -327,7 +327,8 @@ func passNopInstElimination(b *builder) {
 
 	for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
 		for cur := blk.rootInstr; cur != nil; cur = cur.next {
-			switch cur.Opcode() {
+			op := cur.Opcode()
+			switch op {
 			// TODO: add more logics here.
 			// Amount := (Const $someValue)
 			// (Shift X, Amount) where Amount == x.Type.Bits() => X
@@ -351,20 +352,35 @@ func passNopInstElimination(b *builder) {
 					}
 				}
 			// Z := Const 0
-			// (Iadd X, Z) => X
-			// (Iadd Z, Y) => Y
-			case OpcodeIadd:
+			// - (Iadd|Isub X, Z) => X
+			// - (Iadd Z, Y) => Y
+			case OpcodeIadd: //, OpcodeIsub:
 				x, y := cur.Arg2()
-				definingInst := b.valueIDToInstruction[y.ID()]
-				if definingInst == nil {
-					if definingInst = b.valueIDToInstruction[x.ID()]; definingInst == nil {
+				xDef := b.valueIDToInstruction[x.ID()]
+				yDef := b.valueIDToInstruction[y.ID()]
+				if yDef == nil {
+					// If there's no defining instruction, that means the amount is coming from the parameter.
+					if xDef == nil {
+						// If we are adding the two parameters, ignore.
 						continue
 					} else {
-						x = y
+						// Add is commutative, normalize (param, y) => (y, param).
+						x, y = y, x
+						xDef, yDef = yDef, xDef
 					}
 				}
-				if definingInst.Constant() && definingInst.ConstantVal() == 0 {
-					b.alias(cur.Return(), x)
+				if yDef.Constant() {
+					yc := yDef.ConstantVal()
+					if yc == 0 {
+						b.alias(cur.Return(), x)
+					} else if xDef.Constant() {
+						xc := xDef.ConstantVal()
+						cur.opcode = OpcodeIconst
+						cur.u1 = xc + yc
+						cur.u2 = 0
+						cur.v = 0
+						cur.v2 = 0
+					}
 				}
 			}
 		}
diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go
index a30a61e7ca..97971d30b9 100644
--- a/internal/engine/wazevo/ssa/pass_test.go
+++ b/internal/engine/wazevo/ssa/pass_test.go
@@ -310,20 +310,29 @@ blk1: () <-- (blk0)
 				nonZeroI64 := b.AllocateInstruction().AsIconst64(64*245 + 1).Insert(b).Return()
 				nonZeroSshr := b.AllocateInstruction().AsSshr(i64Param, nonZeroI64).Insert(b).Return()
 
-				// Iadd32.
-				zero32 := b.AllocateInstruction().AsIconst32(0).Insert(b).Return()
-				nopIadd32 := b.AllocateInstruction().AsIadd(i32Param, zero32).Insert(b).Return()
+				// Iadd32 x + 0 should resolve to const.
+				zeroI32 := b.AllocateInstruction().AsIconst32(0).Insert(b).Return()
+				nopIadd32 := b.AllocateInstruction().AsIadd(i32Param, zeroI32).Insert(b).Return()
 
-				// Iadd32.
-				zero32_2 := b.AllocateInstruction().AsIconst32(0).Insert(b).Return()
-				nopIadd32_2 := b.AllocateInstruction().AsIadd(zero32_2, i32Param).Insert(b).Return()
+				// Iadd32 0 + x should resolve to const.
+				zeroI32_2 := b.AllocateInstruction().AsIconst32(0).Insert(b).Return()
+				nopIadd32_2 := b.AllocateInstruction().AsIadd(zeroI32_2, i32Param).Insert(b).Return()
 
-				// Iadd64.
-				zero64 := b.AllocateInstruction().AsIconst64(0).Insert(b).Return()
-				nopIadd64 := b.AllocateInstruction().AsIadd(i64Param, zero64).Insert(b).Return()
+				// Iadd64 x + 0 should resolve to const.
+				zeroI64 := b.AllocateInstruction().AsIconst64(0).Insert(b).Return()
+				nopIadd64 := b.AllocateInstruction().AsIadd(i64Param, zeroI64).Insert(b).Return()
+
+				// Iadd64 0 + x should resolve to const.
+				zeroI64_2 := b.AllocateInstruction().AsIconst64(0).Insert(b).Return()
+				nopIadd64_2 := b.AllocateInstruction().AsIadd(zeroI64_2, i64Param).Insert(b).Return()
+
+				// Iadd32 const1 + const2 should resolve to Const (const1 + const2).
+				nonZeroI32_3 := b.AllocateInstruction().AsIconst32(1234).Insert(b).Return()
+				nonZeroI32_4 := b.AllocateInstruction().AsIconst32(5678).Insert(b).Return()
+				foldIaddI32_3 := b.AllocateInstruction().AsIadd(nonZeroI32_3, nonZeroI32_4).Insert(b).Return()
 
 				ret := b.AllocateInstruction()
-				ret.AsReturn([]Value{nopIshl, nopUshr, nonZeroIshl, nonZeroSshr, nopIadd32, nopIadd32_2, nopIadd64})
+				ret.AsReturn([]Value{nopIshl, nopUshr, nonZeroIshl, nonZeroSshr, nopIadd32, nopIadd32_2, nopIadd64, nopIadd64_2, foldIaddI32_3})
 				b.InsertInstruction(ret)
 				return nil
 			},
@@ -343,7 +352,12 @@ blk0: (v0:i32, v1:i64)
 	v13:i32 = Iadd v12, v0
 	v14:i64 = Iconst_64 0x0
 	v15:i64 = Iadd v1, v14
-	Return v3, v5, v7, v9, v11, v13, v15
+	v16:i64 = Iconst_64 0x0
+	v17:i64 = Iadd v16, v1
+	v18:i32 = Iconst_32 0x4d2
+	v19:i32 = Iconst_32 0x162e
+	v20:i32 = Iadd v18, v19
+	Return v3, v5, v7, v9, v11, v13, v15, v17, v20
 `,
 			after: `
 blk0: (v0:i32, v1:i64)
@@ -351,7 +365,7 @@ blk0: (v0:i32, v1:i64)
 	v7:i32 = Ishl v0, v6
 	v8:i64 = Iconst_64 0x3d41
 	v9:i64 = Sshr v1, v8
-	Return v0, v1, v7, v9, v0, v0, v1
+	Return v0, v1, v7, v9, v0, v0, v1, v1
 `,
 		},
 	} {

From d68ac6bf8e58f9fca064ee7f21fbaf48d760a28b Mon Sep 17 00:00:00 2001
From: Edoardo Vacchi <evacchi@users.noreply.github.com>
Date: Tue, 28 Nov 2023 17:42:18 +0100
Subject: [PATCH 03/22] wip

Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
---
 internal/engine/wazevo/ssa/pass.go      | 106 ++++++++++++++++--------
 internal/engine/wazevo/ssa/pass_test.go |  57 ++++++++++---
 2 files changed, 120 insertions(+), 43 deletions(-)

diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go
index 6cfb6689ec..af71d0d5f4 100644
--- a/internal/engine/wazevo/ssa/pass.go
+++ b/internal/engine/wazevo/ssa/pass.go
@@ -33,6 +33,8 @@ func (b *builder) RunPasses() {
 	// 	Arithmetic simplifications.
 	// 	and more!
 
+	passConstFoldingOpt(b)
+
 	// passDeadCodeEliminationOpt could be more accurate if we do this after other optimizations.
 	passDeadCodeEliminationOpt(b)
 	b.donePasses = true
@@ -309,21 +311,7 @@ func (b *builder) clearBlkVisited() {
 
 // passNopInstElimination eliminates the instructions which is essentially a no-op.
 func passNopInstElimination(b *builder) {
-	if int(b.nextValueID) >= len(b.valueIDToInstruction) {
-		b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...)
-	}
-
-	for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
-		for cur := blk.rootInstr; cur != nil; cur = cur.next {
-			r1, rs := cur.Returns()
-			if r1.Valid() {
-				b.valueIDToInstruction[r1.ID()] = cur
-			}
-			for _, r := range rs {
-				b.valueIDToInstruction[r.ID()] = cur
-			}
-		}
-	}
+	ensureValueIdToInstructionInit(b)
 
 	for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
 		for cur := blk.rootInstr; cur != nil; cur = cur.next {
@@ -352,32 +340,84 @@ func passNopInstElimination(b *builder) {
 					}
 				}
 			// Z := Const 0
-			// - (Iadd|Isub X, Z) => X
-			// - (Iadd Z, Y) => Y
-			case OpcodeIadd: //, OpcodeIsub:
+			// (Iadd X, Z) => X
+			// (Iadd Z, Y) => Y
+			case OpcodeIadd:
 				x, y := cur.Arg2()
-				xDef := b.valueIDToInstruction[x.ID()]
-				yDef := b.valueIDToInstruction[y.ID()]
-				if yDef == nil {
-					// If there's no defining instruction, that means the amount is coming from the parameter.
-					if xDef == nil {
-						// If we are adding the two parameters, ignore.
+				definingInst := b.valueIDToInstruction[y.ID()]
+				if definingInst == nil {
+					if definingInst = b.valueIDToInstruction[x.ID()]; definingInst == nil {
 						continue
 					} else {
-						// Add is commutative, normalize (param, y) => (y, param).
-						x, y = y, x
-						xDef, yDef = yDef, xDef
+						x = y
 					}
 				}
-				if yDef.Constant() {
-					yc := yDef.ConstantVal()
-					if yc == 0 {
-						b.alias(cur.Return(), x)
-					} else if xDef.Constant() {
+				if definingInst.Constant() && definingInst.ConstantVal() == 0 {
+					b.alias(cur.Return(), x)
+				}
+			}
+		}
+	}
+}
+
+func ensureValueIdToInstructionInit(b *builder) {
+	if len(b.valueIDToInstruction) != 0 {
+		return
+	}
+
+	if int(b.nextValueID) >= len(b.valueIDToInstruction) {
+		b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...)
+	}
+
+	for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
+		for cur := blk.rootInstr; cur != nil; cur = cur.next {
+			r1, rs := cur.Returns()
+			if r1.Valid() {
+				b.valueIDToInstruction[r1.ID()] = cur
+			}
+			for _, r := range rs {
+				b.valueIDToInstruction[r.ID()] = cur
+			}
+		}
+	}
+}
+
+// passNopInstElimination eliminates the instructions which is essentially a no-op.
+func passConstFoldingOpt(b *builder) {
+	ensureValueIdToInstructionInit(b)
+
+	isFixedPoint := false
+	for !isFixedPoint {
+		isFixedPoint = true
+		for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
+			for cur := blk.rootInstr; cur != nil; cur = cur.next {
+				op := cur.Opcode()
+				switch op {
+				// X := Const xc
+				// Y := Const yc
+				// - (Iadd X, Y) => Const (xc + yc)
+				case OpcodeIadd, OpcodeIsub:
+					isFixedPoint = false
+					x, y := cur.Arg2()
+					xDef := b.valueIDToInstruction[x.ID()]
+					yDef := b.valueIDToInstruction[y.ID()]
+					if xDef == nil || yDef == nil {
+						// If we are adding some parameter, ignore.
+						continue
+					}
+					if xDef.Constant() || yDef.Constant() {
+						yc := yDef.ConstantVal()
 						xc := xDef.ConstantVal()
+						// Mutate the instruction to an Iconst.
 						cur.opcode = OpcodeIconst
-						cur.u1 = xc + yc
+						switch op {
+						case OpcodeIadd:
+							cur.u1 = xc + yc
+						case OpcodeIsub:
+							cur.u1 = xc - yc
+						}
 						cur.u2 = 0
+						// Clear the references to operands.
 						cur.v = 0
 						cur.v2 = 0
 					}
diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go
index 97971d30b9..2bdb14335d 100644
--- a/internal/engine/wazevo/ssa/pass_test.go
+++ b/internal/engine/wazevo/ssa/pass_test.go
@@ -326,13 +326,8 @@ blk1: () <-- (blk0)
 				zeroI64_2 := b.AllocateInstruction().AsIconst64(0).Insert(b).Return()
 				nopIadd64_2 := b.AllocateInstruction().AsIadd(zeroI64_2, i64Param).Insert(b).Return()
 
-				// Iadd32 const1 + const2 should resolve to Const (const1 + const2).
-				nonZeroI32_3 := b.AllocateInstruction().AsIconst32(1234).Insert(b).Return()
-				nonZeroI32_4 := b.AllocateInstruction().AsIconst32(5678).Insert(b).Return()
-				foldIaddI32_3 := b.AllocateInstruction().AsIadd(nonZeroI32_3, nonZeroI32_4).Insert(b).Return()
-
 				ret := b.AllocateInstruction()
-				ret.AsReturn([]Value{nopIshl, nopUshr, nonZeroIshl, nonZeroSshr, nopIadd32, nopIadd32_2, nopIadd64, nopIadd64_2, foldIaddI32_3})
+				ret.AsReturn([]Value{nopIshl, nopUshr, nonZeroIshl, nonZeroSshr, nopIadd32, nopIadd32_2, nopIadd64, nopIadd64_2})
 				b.InsertInstruction(ret)
 				return nil
 			},
@@ -354,10 +349,7 @@ blk0: (v0:i32, v1:i64)
 	v15:i64 = Iadd v1, v14
 	v16:i64 = Iconst_64 0x0
 	v17:i64 = Iadd v16, v1
-	v18:i32 = Iconst_32 0x4d2
-	v19:i32 = Iconst_32 0x162e
-	v20:i32 = Iadd v18, v19
-	Return v3, v5, v7, v9, v11, v13, v15, v17, v20
+	Return v3, v5, v7, v9, v11, v13, v15, v17
 `,
 			after: `
 blk0: (v0:i32, v1:i64)
@@ -366,6 +358,51 @@ blk0: (v0:i32, v1:i64)
 	v8:i64 = Iconst_64 0x3d41
 	v9:i64 = Sshr v1, v8
 	Return v0, v1, v7, v9, v0, v0, v1, v1
+`,
+		},
+		{
+			name:     "const folding",
+			pass:     passConstFoldingOpt,
+			postPass: passDeadCodeEliminationOpt,
+			setup: func(b *builder) (verifier func(t *testing.T)) {
+				entry := b.AllocateBasicBlock()
+				b.SetCurrentBlock(entry)
+
+				// Iadd32 const1 + const2 should resolve to Const (const1 + const2).
+				nonZeroI32_1 := b.AllocateInstruction().AsIconst32(0x1).Insert(b).Return()
+				nonZeroI32_2 := b.AllocateInstruction().AsIconst32(0x2).Insert(b).Return()
+				foldIaddI32_1 := b.AllocateInstruction().AsIadd(nonZeroI32_1, nonZeroI32_2).Insert(b).Return()
+
+				// Iadd32 foldedConst1, const3 should resolve to Const (foldedConst1, const3).
+				nonZeroI32_3 := b.AllocateInstruction().AsIconst32(0x3).Insert(b).Return()
+				foldIaddI32_2 := b.AllocateInstruction().AsIadd(foldIaddI32_1, nonZeroI32_3).Insert(b).Return()
+
+				// Isub32 foldedConst1, const3 should resolve to Const (const4, foldedConst2).
+				nonZeroI32_4 := b.AllocateInstruction().AsIconst32(0x4).Insert(b).Return()
+				foldIsubI32_1 := b.AllocateInstruction().AsIsub(nonZeroI32_4, foldIaddI32_2).Insert(b).Return()
+
+				ret := b.AllocateInstruction()
+				ret.AsReturn([]Value{foldIsubI32_1})
+				b.InsertInstruction(ret)
+				return nil
+			},
+			before: `
+blk0: ()
+	v0:i32 = Iconst_32 0x1
+	v1:i32 = Iconst_32 0x2
+	v2:i32 = Iadd v0, v1
+	v3:i32 = Iconst_32 0x3
+	v4:i32 = Iadd v2, v3
+	v5:i32 = Iconst_32 0x4
+	v6:i32 = Isub v5, v4
+	Return v6
+`,
+			// FIXME: the first `Iconst_32 0x1` should be dead code, and should not be present in the output.
+			after: `
+blk0: ()
+	v0:i32 = Iconst_32 0x1
+	v6:i32 = Iconst_32 0xfffffffe
+	Return v6
 `,
 		},
 	} {

From 10f0f22be893b2d543eab02e3ef6553910d224ec Mon Sep 17 00:00:00 2001
From: Edoardo Vacchi <evacchi@users.noreply.github.com>
Date: Tue, 28 Nov 2023 17:54:01 +0100
Subject: [PATCH 04/22] wip

Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
---
 internal/engine/wazevo/ssa/pass.go | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go
index af71d0d5f4..8d6ce0e504 100644
--- a/internal/engine/wazevo/ssa/pass.go
+++ b/internal/engine/wazevo/ssa/pass.go
@@ -382,7 +382,7 @@ func ensureValueIdToInstructionInit(b *builder) {
 	}
 }
 
-// passNopInstElimination eliminates the instructions which is essentially a no-op.
+// passConstFoldingOpt folds constant arithmetic ops into constant ops.
 func passConstFoldingOpt(b *builder) {
 	ensureValueIdToInstructionInit(b)
 
@@ -397,7 +397,6 @@ func passConstFoldingOpt(b *builder) {
 				// Y := Const yc
 				// - (Iadd X, Y) => Const (xc + yc)
 				case OpcodeIadd, OpcodeIsub:
-					isFixedPoint = false
 					x, y := cur.Arg2()
 					xDef := b.valueIDToInstruction[x.ID()]
 					yDef := b.valueIDToInstruction[y.ID()]
@@ -405,7 +404,9 @@ func passConstFoldingOpt(b *builder) {
 						// If we are adding some parameter, ignore.
 						continue
 					}
-					if xDef.Constant() || yDef.Constant() {
+					if xDef.Constant() && yDef.Constant() {
+						isFixedPoint = false
+
 						yc := yDef.ConstantVal()
 						xc := xDef.ConstantVal()
 						// Mutate the instruction to an Iconst.

From 87e44539219f2411d4fa0e4729983e82b6481172 Mon Sep 17 00:00:00 2001
From: Edoardo Vacchi <evacchi@users.noreply.github.com>
Date: Wed, 29 Nov 2023 10:39:07 +0100
Subject: [PATCH 05/22] valueIDToInstruction should be always re-inited

Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
---
 internal/engine/wazevo/ssa/pass.go | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go
index 8d6ce0e504..60823f721e 100644
--- a/internal/engine/wazevo/ssa/pass.go
+++ b/internal/engine/wazevo/ssa/pass.go
@@ -361,9 +361,7 @@ func passNopInstElimination(b *builder) {
 }
 
 func ensureValueIdToInstructionInit(b *builder) {
-	if len(b.valueIDToInstruction) != 0 {
-		return
-	}
+	b.valueIDToInstruction = b.valueIDToInstruction[:0]
 
 	if int(b.nextValueID) >= len(b.valueIDToInstruction) {
 		b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...)

From 48bf12da94b3052ca595a3dad1a2d15a4affcbca Mon Sep 17 00:00:00 2001
From: Edoardo Vacchi <evacchi@users.noreply.github.com>
Date: Wed, 29 Nov 2023 11:35:29 +0100
Subject: [PATCH 06/22] valueIDToInstruction does not have to be cleared
 explicitly

Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
---
 internal/engine/wazevo/ssa/pass.go | 2 --
 1 file changed, 2 deletions(-)

diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go
index 60823f721e..5e8ae74caa 100644
--- a/internal/engine/wazevo/ssa/pass.go
+++ b/internal/engine/wazevo/ssa/pass.go
@@ -361,8 +361,6 @@ func passNopInstElimination(b *builder) {
 }
 
 func ensureValueIdToInstructionInit(b *builder) {
-	b.valueIDToInstruction = b.valueIDToInstruction[:0]
-
 	if int(b.nextValueID) >= len(b.valueIDToInstruction) {
 		b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...)
 	}

From 82d6f04e14cdbb2488b3a95ead3f333432c8e273 Mon Sep 17 00:00:00 2001
From: Edoardo Vacchi <evacchi@users.noreply.github.com>
Date: Wed, 29 Nov 2023 11:55:26 +0100
Subject: [PATCH 07/22] wip

Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
---
 internal/engine/wazevo/ssa/pass.go | 46 ++++++++++++++++++++++++++----
 1 file changed, 41 insertions(+), 5 deletions(-)

diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go
index 5e8ae74caa..9b633b1208 100644
--- a/internal/engine/wazevo/ssa/pass.go
+++ b/internal/engine/wazevo/ssa/pass.go
@@ -2,6 +2,7 @@ package ssa
 
 import (
 	"fmt"
+	"math"
 	"sort"
 
 	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
@@ -392,7 +393,7 @@ func passConstFoldingOpt(b *builder) {
 				// X := Const xc
 				// Y := Const yc
 				// - (Iadd X, Y) => Const (xc + yc)
-				case OpcodeIadd, OpcodeIsub:
+				case OpcodeIadd, OpcodeIsub, OpcodeImul:
 					x, y := cur.Arg2()
 					xDef := b.valueIDToInstruction[x.ID()]
 					yDef := b.valueIDToInstruction[y.ID()]
@@ -402,21 +403,56 @@ func passConstFoldingOpt(b *builder) {
 					}
 					if xDef.Constant() && yDef.Constant() {
 						isFixedPoint = false
-
 						yc := yDef.ConstantVal()
 						xc := xDef.ConstantVal()
 						// Mutate the instruction to an Iconst.
+						// We assume all the types are consistent.
 						cur.opcode = OpcodeIconst
+						// Clear the references to operands.
+						cur.v, cur.v2 = 0, 0
 						switch op {
 						case OpcodeIadd:
 							cur.u1 = xc + yc
 						case OpcodeIsub:
 							cur.u1 = xc - yc
+						case OpcodeImul:
+							cur.u1 = xc * yc
 						}
-						cur.u2 = 0
+					}
+				case OpcodeFadd, OpcodeFsub:
+					x, y := cur.Arg2()
+					xDef := b.valueIDToInstruction[x.ID()]
+					yDef := b.valueIDToInstruction[y.ID()]
+					if xDef == nil || yDef == nil {
+						// If we are adding some parameter, ignore.
+						continue
+					}
+					if xDef.Constant() && yDef.Constant() {
+						isFixedPoint = false
+						// Mutate the instruction to an Iconst.
+						// We assume all the types are consistent.
+						cur.opcode = OpcodeIconst
 						// Clear the references to operands.
-						cur.v = 0
-						cur.v2 = 0
+						cur.v, cur.v2 = 0, 0
+						if x.Type().Bits() == 64 {
+							yc := math.Float64frombits(yDef.ConstantVal())
+							xc := math.Float64frombits(xDef.ConstantVal())
+							switch op {
+							case OpcodeFadd:
+								cur.u1 = uint64(math.Float64bits(xc + yc))
+							case OpcodeFsub:
+								cur.u1 = uint64(math.Float64bits(xc - yc))
+							}
+						} else {
+							yc := math.Float32frombits(uint32(yDef.ConstantVal()))
+							xc := math.Float32frombits(uint32(xDef.ConstantVal()))
+							switch op {
+							case OpcodeFadd:
+								cur.u1 = uint64(math.Float32bits(xc + yc))
+							case OpcodeFsub:
+								cur.u1 = uint64(math.Float32bits(xc - yc))
+							}
+						}
 					}
 				}
 			}

From 500aa5457ecaf05b2a017465e6fb7c3524c199d3 Mon Sep 17 00:00:00 2001
From: Edoardo Vacchi <evacchi@users.noreply.github.com>
Date: Wed, 29 Nov 2023 13:44:38 +0100
Subject: [PATCH 08/22] operands were not actually cleared! (not reset to
 ValueInvalid)

Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
---
 internal/engine/wazevo/ssa/pass.go      | 9 +++++----
 internal/engine/wazevo/ssa/pass_test.go | 1 -
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go
index 5e8ae74caa..70b9056b28 100644
--- a/internal/engine/wazevo/ssa/pass.go
+++ b/internal/engine/wazevo/ssa/pass.go
@@ -405,6 +405,11 @@ func passConstFoldingOpt(b *builder) {
 
 						yc := yDef.ConstantVal()
 						xc := xDef.ConstantVal()
+
+						// Clear the references to operands.
+						cur.v = ValueInvalid
+						cur.v2 = ValueInvalid
+
 						// Mutate the instruction to an Iconst.
 						cur.opcode = OpcodeIconst
 						switch op {
@@ -413,10 +418,6 @@ func passConstFoldingOpt(b *builder) {
 						case OpcodeIsub:
 							cur.u1 = xc - yc
 						}
-						cur.u2 = 0
-						// Clear the references to operands.
-						cur.v = 0
-						cur.v2 = 0
 					}
 				}
 			}
diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go
index 2bdb14335d..3692dd2cd1 100644
--- a/internal/engine/wazevo/ssa/pass_test.go
+++ b/internal/engine/wazevo/ssa/pass_test.go
@@ -400,7 +400,6 @@ blk0: ()
 			// FIXME: the first `Iconst_32 0x1` should be dead code, and should not be present in the output.
 			after: `
 blk0: ()
-	v0:i32 = Iconst_32 0x1
 	v6:i32 = Iconst_32 0xfffffffe
 	Return v6
 `,

From 6c10833ecfb52123df2682b547afe2bccadea5d2 Mon Sep 17 00:00:00 2001
From: Edoardo Vacchi <evacchi@users.noreply.github.com>
Date: Wed, 29 Nov 2023 16:14:11 +0100
Subject: [PATCH 09/22] add tests for mul and floats

Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
---
 internal/engine/wazevo/ssa/instructions.go |   9 +-
 internal/engine/wazevo/ssa/pass.go         |  12 +--
 internal/engine/wazevo/ssa/pass_test.go    | 102 ++++++++++++++++++++-
 3 files changed, 109 insertions(+), 14 deletions(-)

diff --git a/internal/engine/wazevo/ssa/instructions.go b/internal/engine/wazevo/ssa/instructions.go
index 4dbe0883af..97730e81a5 100644
--- a/internal/engine/wazevo/ssa/instructions.go
+++ b/internal/engine/wazevo/ssa/instructions.go
@@ -1732,27 +1732,30 @@ func (i *Instruction) InsertlaneData() (x, y Value, index byte, l VecLane) {
 }
 
 // AsFadd initializes this instruction as a floating-point addition instruction with OpcodeFadd.
-func (i *Instruction) AsFadd(x, y Value) {
+func (i *Instruction) AsFadd(x, y Value) *Instruction {
 	i.opcode = OpcodeFadd
 	i.v = x
 	i.v2 = y
 	i.typ = x.Type()
+	return i
 }
 
 // AsFsub initializes this instruction as a floating-point subtraction instruction with OpcodeFsub.
-func (i *Instruction) AsFsub(x, y Value) {
+func (i *Instruction) AsFsub(x, y Value) *Instruction {
 	i.opcode = OpcodeFsub
 	i.v = x
 	i.v2 = y
 	i.typ = x.Type()
+	return i
 }
 
 // AsFmul initializes this instruction as a floating-point multiplication instruction with OpcodeFmul.
-func (i *Instruction) AsFmul(x, y Value) {
+func (i *Instruction) AsFmul(x, y Value) *Instruction {
 	i.opcode = OpcodeFmul
 	i.v = x
 	i.v2 = y
 	i.typ = x.Type()
+	return i
 }
 
 // AsFdiv initializes this instruction as a floating-point division instruction with OpcodeFdiv.
diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go
index 77636c34e5..005da0bb3f 100644
--- a/internal/engine/wazevo/ssa/pass.go
+++ b/internal/engine/wazevo/ssa/pass.go
@@ -379,7 +379,8 @@ func ensureValueIdToInstructionInit(b *builder) {
 	}
 }
 
-// passConstFoldingOpt folds constant arithmetic ops into constant ops.
+// passConstFoldingOpt scans all instructions for arithmetic operations over constants,
+// and replaces them with a const of their result.
 func passConstFoldingOpt(b *builder) {
 	ensureValueIdToInstructionInit(b)
 
@@ -404,11 +405,10 @@ func passConstFoldingOpt(b *builder) {
 					if xDef.Constant() && yDef.Constant() {
 						isFixedPoint = false
 						// Mutate the instruction to an Iconst.
-						// We assume all the types are consistent.
 						cur.opcode = OpcodeIconst
 						// Clear the references to operands.
 						cur.v, cur.v2 = ValueInvalid, ValueInvalid
-
+						// We assume all the types are consistent.
 						xc, yc := xDef.ConstantVal(), yDef.ConstantVal()
 						switch op {
 						case OpcodeIadd:
@@ -430,12 +430,11 @@ func passConstFoldingOpt(b *builder) {
 					if xDef.Constant() && yDef.Constant() {
 						isFixedPoint = false
 						// Mutate the instruction to an Iconst.
-						// We assume all the types are consistent.
-						cur.opcode = OpcodeIconst
 						// Clear the references to operands.
 						cur.v, cur.v2 = ValueInvalid, ValueInvalid
-
+						// We assume all the types are consistent.
 						if x.Type().Bits() == 64 {
+							cur.opcode = OpcodeF64const
 							yc := math.Float64frombits(yDef.ConstantVal())
 							xc := math.Float64frombits(xDef.ConstantVal())
 							switch op {
@@ -447,6 +446,7 @@ func passConstFoldingOpt(b *builder) {
 								cur.u1 = math.Float64bits(xc * yc)
 							}
 						} else {
+							cur.opcode = OpcodeF32const
 							yc := math.Float32frombits(uint32(yDef.ConstantVal()))
 							xc := math.Float32frombits(uint32(xDef.ConstantVal()))
 							switch op {
diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go
index 3692dd2cd1..4e8635ea53 100644
--- a/internal/engine/wazevo/ssa/pass_test.go
+++ b/internal/engine/wazevo/ssa/pass_test.go
@@ -381,8 +381,69 @@ blk0: (v0:i32, v1:i64)
 				nonZeroI32_4 := b.AllocateInstruction().AsIconst32(0x4).Insert(b).Return()
 				foldIsubI32_1 := b.AllocateInstruction().AsIsub(nonZeroI32_4, foldIaddI32_2).Insert(b).Return()
 
+				// Imul32 foldedConst, foldedConst should resolve to IConst32 (foldedConst * foldedConst).
+				foldImulI32_1 := b.AllocateInstruction().AsImul(foldIsubI32_1, foldIsubI32_1).Insert(b).Return()
+
+				// Iadd64 const1 + const2 should resolve to Const (const1 + const2).
+				nonZeroI64_1 := b.AllocateInstruction().AsIconst64(0x1).Insert(b).Return()
+				nonZeroI64_2 := b.AllocateInstruction().AsIconst64(0x2).Insert(b).Return()
+				foldIaddI64_1 := b.AllocateInstruction().AsIadd(nonZeroI64_1, nonZeroI64_2).Insert(b).Return()
+
+				// Iadd64 foldedConst1, const3 should resolve to Const (foldedConst1, const3).
+				nonZeroI64_3 := b.AllocateInstruction().AsIconst64(0x3).Insert(b).Return()
+				foldIaddI64_2 := b.AllocateInstruction().AsIadd(foldIaddI64_1, nonZeroI64_3).Insert(b).Return()
+
+				// Isub64 const4, foldedConst1 should resolve to Const (const4, foldedConst2).
+				nonZeroI64_4 := b.AllocateInstruction().AsIconst64(0x4).Insert(b).Return()
+				foldIsubI64_1 := b.AllocateInstruction().AsIsub(nonZeroI64_4, foldIaddI64_2).Insert(b).Return()
+
+				// Imul64 foldedConst, foldedConst should resolve to IConst64 (foldedConst * foldedConst).
+				foldImulI64_1 := b.AllocateInstruction().AsImul(foldIsubI64_1, foldIsubI64_1).Insert(b).Return()
+
+				// Fadd32 const1 + const2 should resolve to Const (const1 + const2).
+				nonZeroF32_1 := b.AllocateInstruction().AsF32const(1.0).Insert(b).Return()
+				nonZeroF32_2 := b.AllocateInstruction().AsF32const(2.0).Insert(b).Return()
+				foldFaddF32_1 := b.AllocateInstruction().AsFadd(nonZeroF32_1, nonZeroF32_2).Insert(b).Return()
+
+				// Fadd32 foldedConst1, const3 should resolve to Const (foldedConst1 + const3).
+				nonZeroF32_3 := b.AllocateInstruction().AsF32const(3.0).Insert(b).Return()
+				foldIaddF32_2 := b.AllocateInstruction().AsFadd(foldFaddF32_1, nonZeroF32_3).Insert(b).Return()
+
+				// Fsub32 const4, foldedConst1 should resolve to Const (const4 - foldedConst2).
+				nonZeroF32_4 := b.AllocateInstruction().AsF32const(4.0).Insert(b).Return()
+				foldIsubF32_1 := b.AllocateInstruction().AsFsub(nonZeroF32_4, foldIaddF32_2).Insert(b).Return()
+
+				// Fmul32 foldedConst, foldedConst should resolve to FConst32 (foldedConst * foldedConst).
+				foldFmulF32_1 := b.AllocateInstruction().AsFmul(foldIsubF32_1, foldIsubF32_1).Insert(b).Return()
+
+				// Fadd64 const1 + const2 should resolve to FConst64 (const1 + const2).
+				nonZeroF64_1 := b.AllocateInstruction().AsF64const(1.0).Insert(b).Return()
+				nonZeroF64_2 := b.AllocateInstruction().AsF64const(2.0).Insert(b).Return()
+				// This intermediate value won't be dropped because it is referenced in the result.
+				foldFaddF64_1 := b.AllocateInstruction().AsFadd(nonZeroF64_1, nonZeroF64_2).Insert(b).Return()
+
+				// Fadd64 foldedConst1, const3 should resolve to FConst64 (foldedConst1 + const3).
+				nonZeroF64_3 := b.AllocateInstruction().AsF64const(3.0).Insert(b).Return()
+				foldFaddF64_2 := b.AllocateInstruction().AsFadd(foldFaddF64_1, nonZeroF64_3).Insert(b).Return()
+
+				// Fsub64 const4, foldedConst1 should resolve to FConst64 (const4 - foldedConst2).
+				nonZeroF64_4 := b.AllocateInstruction().AsF64const(4.0).Insert(b).Return()
+				foldFsubF64_1 := b.AllocateInstruction().AsFsub(nonZeroF64_4, foldFaddF64_2).Insert(b).Return()
+
+				// Fmul64 foldedConst, foldedConst should resolve to FConst64 (foldedConst * foldedConst).
+				foldFmulF64_1 := b.AllocateInstruction().AsFmul(foldFsubF64_1, foldFsubF64_1).Insert(b).Return()
+
 				ret := b.AllocateInstruction()
-				ret.AsReturn([]Value{foldIsubI32_1})
+				ret.AsReturn([]Value{
+					foldImulI32_1,
+					foldIsubI64_1,
+					foldImulI64_1,
+					foldIsubF32_1,
+					foldFmulF32_1,
+					foldFaddF64_1,
+					foldFsubF64_1,
+					foldFmulF64_1,
+				})
 				b.InsertInstruction(ret)
 				return nil
 			},
@@ -395,13 +456,44 @@ blk0: ()
 	v4:i32 = Iadd v2, v3
 	v5:i32 = Iconst_32 0x4
 	v6:i32 = Isub v5, v4
-	Return v6
+	v7:i32 = Imul v6, v6
+	v8:i64 = Iconst_64 0x1
+	v9:i64 = Iconst_64 0x2
+	v10:i64 = Iadd v8, v9
+	v11:i64 = Iconst_64 0x3
+	v12:i64 = Iadd v10, v11
+	v13:i64 = Iconst_64 0x4
+	v14:i64 = Isub v13, v12
+	v15:i64 = Imul v14, v14
+	v16:f32 = F32const 1.000000
+	v17:f32 = F32const 2.000000
+	v18:f32 = Fadd v16, v17
+	v19:f32 = F32const 3.000000
+	v20:f32 = Fadd v18, v19
+	v21:f32 = F32const 4.000000
+	v22:f32 = Fsub v21, v20
+	v23:f32 = Fmul v22, v22
+	v24:f64 = F64const 1.000000
+	v25:f64 = F64const 2.000000
+	v26:f64 = Fadd v24, v25
+	v27:f64 = F64const 3.000000
+	v28:f64 = Fadd v26, v27
+	v29:f64 = F64const 4.000000
+	v30:f64 = Fsub v29, v28
+	v31:f64 = Fmul v30, v30
+	Return v7, v14, v15, v22, v23, v26, v30, v31
 `,
-			// FIXME: the first `Iconst_32 0x1` should be dead code, and should not be present in the output.
 			after: `
 blk0: ()
-	v6:i32 = Iconst_32 0xfffffffe
-	Return v6
+	v7:i32 = Iconst_32 0x4
+	v14:i64 = Iconst_64 0xfffffffffffffffe
+	v15:i64 = Iconst_64 0x4
+	v22:f32 = F32const -2.000000
+	v23:f32 = F32const 4.000000
+	v26:f64 = F64const 3.000000
+	v30:f64 = F64const -2.000000
+	v31:f64 = F64const 4.000000
+	Return v7, v14, v15, v22, v23, v26, v30, v31
 `,
 		},
 	} {

From 073789ca3329c001d420d2295c9740133e2b7156 Mon Sep 17 00:00:00 2001
From: Edoardo Vacchi <evacchi@users.noreply.github.com>
Date: Wed, 29 Nov 2023 16:30:49 +0100
Subject: [PATCH 10/22] passCollectValueIdToInstructionMapping is its own pass

Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
---
 internal/engine/wazevo/ssa/pass.go      |  8 +++-----
 internal/engine/wazevo/ssa/pass_test.go | 14 ++++++++++----
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go
index 005da0bb3f..3cef6b169b 100644
--- a/internal/engine/wazevo/ssa/pass.go
+++ b/internal/engine/wazevo/ssa/pass.go
@@ -20,6 +20,8 @@ func (b *builder) RunPasses() {
 	passRedundantPhiEliminationOpt(b)
 	// The result of passCalculateImmediateDominators will be used by various passes below.
 	passCalculateImmediateDominators(b)
+
+	passCollectValueIdToInstructionMapping(b)
 	passNopInstElimination(b)
 
 	// TODO: implement either conversion of irreducible CFG into reducible one, or irreducible CFG detection where we panic.
@@ -312,8 +314,6 @@ func (b *builder) clearBlkVisited() {
 
 // passNopInstElimination eliminates the instructions which is essentially a no-op.
 func passNopInstElimination(b *builder) {
-	ensureValueIdToInstructionInit(b)
-
 	for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
 		for cur := blk.rootInstr; cur != nil; cur = cur.next {
 			op := cur.Opcode()
@@ -361,7 +361,7 @@ func passNopInstElimination(b *builder) {
 	}
 }
 
-func ensureValueIdToInstructionInit(b *builder) {
+func passCollectValueIdToInstructionMapping(b *builder) {
 	if int(b.nextValueID) >= len(b.valueIDToInstruction) {
 		b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...)
 	}
@@ -382,8 +382,6 @@ func ensureValueIdToInstructionInit(b *builder) {
 // passConstFoldingOpt scans all instructions for arithmetic operations over constants,
 // and replaces them with a const of their result.
 func passConstFoldingOpt(b *builder) {
-	ensureValueIdToInstructionInit(b)
-
 	isFixedPoint := false
 	for !isFixedPoint {
 		isFixedPoint = true
diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go
index 4e8635ea53..28f602c8b8 100644
--- a/internal/engine/wazevo/ssa/pass_test.go
+++ b/internal/engine/wazevo/ssa/pass_test.go
@@ -285,8 +285,11 @@ blk1: () <-- (blk0)
 `,
 		},
 		{
-			name:     "nop elimination",
-			pass:     passNopInstElimination,
+			name: "nop elimination",
+			pass: func(b *builder) {
+				passCollectValueIdToInstructionMapping(b)
+				passNopInstElimination(b)
+			},
 			postPass: passDeadCodeEliminationOpt,
 			setup: func(b *builder) (verifier func(t *testing.T)) {
 				entry := b.AllocateBasicBlock()
@@ -361,8 +364,11 @@ blk0: (v0:i32, v1:i64)
 `,
 		},
 		{
-			name:     "const folding",
-			pass:     passConstFoldingOpt,
+			name: "const folding",
+			pass: func(b *builder) {
+				passCollectValueIdToInstructionMapping(b)
+				passConstFoldingOpt(b)
+			},
 			postPass: passDeadCodeEliminationOpt,
 			setup: func(b *builder) (verifier func(t *testing.T)) {
 				entry := b.AllocateBasicBlock()

From e01691ca4a9791f49791e445c2036f377c4dbe05 Mon Sep 17 00:00:00 2001
From: Edoardo Vacchi <evacchi@users.noreply.github.com>
Date: Wed, 29 Nov 2023 16:33:23 +0100
Subject: [PATCH 11/22] deduplicate code in pass  deadcode elim

Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
---
 internal/engine/wazevo/ssa/pass.go      | 11 -----------
 internal/engine/wazevo/ssa/pass_test.go |  5 ++++-
 2 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go
index 3cef6b169b..1c7376b5c8 100644
--- a/internal/engine/wazevo/ssa/pass.go
+++ b/internal/engine/wazevo/ssa/pass.go
@@ -179,9 +179,6 @@ func passDeadCodeEliminationOpt(b *builder) {
 	if nvid >= len(b.valueRefCounts) {
 		b.valueRefCounts = append(b.valueRefCounts, make([]int, b.nextValueID)...)
 	}
-	if nvid >= len(b.valueIDToInstruction) {
-		b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...)
-	}
 
 	// First, we gather all the instructions with side effects.
 	liveInstructions := b.instStack[:0]
@@ -200,14 +197,6 @@ func passDeadCodeEliminationOpt(b *builder) {
 				// The strict side effect should create different instruction groups.
 				gid++
 			}
-
-			r1, rs := cur.Returns()
-			if r1.Valid() {
-				b.valueIDToInstruction[r1.ID()] = cur
-			}
-			for _, r := range rs {
-				b.valueIDToInstruction[r.ID()] = cur
-			}
 		}
 	}
 
diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go
index 28f602c8b8..402931af27 100644
--- a/internal/engine/wazevo/ssa/pass_test.go
+++ b/internal/engine/wazevo/ssa/pass_test.go
@@ -194,7 +194,10 @@ blk2: () <-- (blk1)
 		},
 		{
 			name: "dead code",
-			pass: passDeadCodeEliminationOpt,
+			pass: func(b *builder) {
+				passCollectValueIdToInstructionMapping(b)
+				passDeadCodeEliminationOpt(b)
+			},
 			setup: func(b *builder) func(*testing.T) {
 				entry, end := b.AllocateBasicBlock(), b.AllocateBasicBlock()
 

From fdcc3c726f89a17922bc60774e628124c3d73b81 Mon Sep 17 00:00:00 2001
From: Edoardo Vacchi <evacchi@users.noreply.github.com>
Date: Wed, 29 Nov 2023 16:56:52 +0100
Subject: [PATCH 12/22] fixed point is local to each basic block

Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
---
 internal/engine/wazevo/ssa/pass.go      | 17 ++++++++------
 internal/engine/wazevo/ssa/pass_test.go | 30 ++++++++++++-------------
 2 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go
index 1c7376b5c8..046a1c70d8 100644
--- a/internal/engine/wazevo/ssa/pass.go
+++ b/internal/engine/wazevo/ssa/pass.go
@@ -18,10 +18,11 @@ func (b *builder) RunPasses() {
 	passSortSuccessors(b)
 	passDeadBlockEliminationOpt(b)
 	passRedundantPhiEliminationOpt(b)
-	// The result of passCalculateImmediateDominators will be used by various passes below.
+	// The result of passCalculateImmediateDominators and passCollectValueIdToInstructionMapping
+	// will be used by various passes below.
 	passCalculateImmediateDominators(b)
-
 	passCollectValueIdToInstructionMapping(b)
+
 	passNopInstElimination(b)
 
 	// TODO: implement either conversion of irreducible CFG into reducible one, or irreducible CFG detection where we panic.
@@ -371,11 +372,13 @@ func passCollectValueIdToInstructionMapping(b *builder) {
 // passConstFoldingOpt scans all instructions for arithmetic operations over constants,
 // and replaces them with a const of their result.
 func passConstFoldingOpt(b *builder) {
-	isFixedPoint := false
-	for !isFixedPoint {
-		isFixedPoint = true
-		for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
-			for cur := blk.rootInstr; cur != nil; cur = cur.next {
+	for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
+		for cur := blk.rootInstr; cur != nil; cur = cur.next {
+			// The fixed point is reached through a simple iteration over the list of instructions.
+			// Note: Instead of just an unbounded loop with a flag, we may also add an upper bound to the number of iterations.
+			isFixedPoint := false
+			for !isFixedPoint {
+				isFixedPoint = true
 				op := cur.Opcode()
 				switch op {
 				// X := Const xc
diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go
index 402931af27..853382a33b 100644
--- a/internal/engine/wazevo/ssa/pass_test.go
+++ b/internal/engine/wazevo/ssa/pass_test.go
@@ -9,6 +9,9 @@ import (
 func TestBuilder_passes(t *testing.T) {
 	for _, tc := range []struct {
 		name string
+		// prePass is run before the pass is executed, and can be used to configure the environment
+		// (e.g. init `*builder` fields).
+		prePass,
 		// pass is the optimization pass to run.
 		pass,
 		// postPass is run after the pass is executed, and can be used to test a pass that depends on another pass.
@@ -193,11 +196,9 @@ blk2: () <-- (blk1)
 `,
 		},
 		{
-			name: "dead code",
-			pass: func(b *builder) {
-				passCollectValueIdToInstructionMapping(b)
-				passDeadCodeEliminationOpt(b)
-			},
+			name:    "dead code",
+			prePass: passCollectValueIdToInstructionMapping,
+			pass:    passDeadCodeEliminationOpt,
 			setup: func(b *builder) func(*testing.T) {
 				entry, end := b.AllocateBasicBlock(), b.AllocateBasicBlock()
 
@@ -288,11 +289,9 @@ blk1: () <-- (blk0)
 `,
 		},
 		{
-			name: "nop elimination",
-			pass: func(b *builder) {
-				passCollectValueIdToInstructionMapping(b)
-				passNopInstElimination(b)
-			},
+			name:     "nop elimination",
+			prePass:  passCollectValueIdToInstructionMapping,
+			pass:     passNopInstElimination,
 			postPass: passDeadCodeEliminationOpt,
 			setup: func(b *builder) (verifier func(t *testing.T)) {
 				entry := b.AllocateBasicBlock()
@@ -367,11 +366,9 @@ blk0: (v0:i32, v1:i64)
 `,
 		},
 		{
-			name: "const folding",
-			pass: func(b *builder) {
-				passCollectValueIdToInstructionMapping(b)
-				passConstFoldingOpt(b)
-			},
+			name:     "const folding",
+			prePass:  passCollectValueIdToInstructionMapping,
+			pass:     passConstFoldingOpt,
 			postPass: passDeadCodeEliminationOpt,
 			setup: func(b *builder) (verifier func(t *testing.T)) {
 				entry := b.AllocateBasicBlock()
@@ -511,6 +508,9 @@ blk0: ()
 			b := NewBuilder().(*builder)
 			verifier := tc.setup(b)
 			require.Equal(t, tc.before, b.Format())
+			if tc.prePass != nil {
+				tc.prePass(b)
+			}
 			tc.pass(b)
 			if verifier != nil {
 				verifier(t)

From 2842b95e135a7924a54e6da8eeffd9cbad45021a Mon Sep 17 00:00:00 2001
From: Edoardo Vacchi <evacchi@users.noreply.github.com>
Date: Wed, 29 Nov 2023 23:00:27 +0100
Subject: [PATCH 13/22] more test cases

Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
---
 internal/engine/wazevo/ssa/pass.go      | 28 +++++++---
 internal/engine/wazevo/ssa/pass_test.go | 68 +++++++++++++++++++++++++
 2 files changed, 88 insertions(+), 8 deletions(-)

diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go
index 046a1c70d8..24de30bb1b 100644
--- a/internal/engine/wazevo/ssa/pass.go
+++ b/internal/engine/wazevo/ssa/pass.go
@@ -399,14 +399,26 @@ func passConstFoldingOpt(b *builder) {
 						// Clear the references to operands.
 						cur.v, cur.v2 = ValueInvalid, ValueInvalid
 						// We assume all the types are consistent.
-						xc, yc := xDef.ConstantVal(), yDef.ConstantVal()
-						switch op {
-						case OpcodeIadd:
-							cur.u1 = xc + yc
-						case OpcodeIsub:
-							cur.u1 = xc - yc
-						case OpcodeImul:
-							cur.u1 = xc * yc
+						if x.Type().Bits() == 64 {
+							xc, yc := int64(xDef.ConstantVal()), int64(yDef.ConstantVal())
+							switch op {
+							case OpcodeIadd:
+								cur.u1 = uint64(xc + yc)
+							case OpcodeIsub:
+								cur.u1 = uint64(xc - yc)
+							case OpcodeImul:
+								cur.u1 = uint64(xc * yc)
+							}
+						} else {
+							xc, yc := int32(xDef.ConstantVal()), int32(yDef.ConstantVal())
+							switch op {
+							case OpcodeIadd:
+								cur.u1 = uint64(xc + yc)
+							case OpcodeIsub:
+								cur.u1 = uint64(xc - yc)
+							case OpcodeImul:
+								cur.u1 = uint64(xc * yc)
+							}
 						}
 					}
 				case OpcodeFadd, OpcodeFsub, OpcodeFmul:
diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go
index 853382a33b..1b786eceaf 100644
--- a/internal/engine/wazevo/ssa/pass_test.go
+++ b/internal/engine/wazevo/ssa/pass_test.go
@@ -1,6 +1,7 @@
 package ssa
 
 import (
+	"math"
 	"testing"
 
 	"github.com/tetratelabs/wazero/internal/testing/require"
@@ -500,6 +501,73 @@ blk0: ()
 	v30:f64 = F64const -2.000000
 	v31:f64 = F64const 4.000000
 	Return v7, v14, v15, v22, v23, v26, v30, v31
+`,
+		},
+		{
+			name:     "const folding (overflow)",
+			prePass:  passCollectValueIdToInstructionMapping,
+			pass:     passConstFoldingOpt,
+			postPass: passDeadCodeEliminationOpt,
+			setup: func(b *builder) (verifier func(t *testing.T)) {
+				entry := b.AllocateBasicBlock()
+				b.SetCurrentBlock(entry)
+
+				maxI32 := b.AllocateInstruction().AsIconst32(math.MaxInt32).Insert(b).Return()
+				oneI32 := b.AllocateInstruction().AsIconst32(1).Insert(b).Return()
+				wrapI32 := b.AllocateInstruction().AsIadd(maxI32, oneI32).Insert(b).Return()
+				mulI32 := b.AllocateInstruction().AsImul(maxI32, maxI32).Insert(b).Return()
+
+				maxI64 := b.AllocateInstruction().AsIconst64(math.MaxInt64).Insert(b).Return()
+				oneI64 := b.AllocateInstruction().AsIconst64(1).Insert(b).Return()
+				wrapI64 := b.AllocateInstruction().AsIadd(maxI64, oneI64).Insert(b).Return()
+				mulI64 := b.AllocateInstruction().AsImul(maxI64, maxI64).Insert(b).Return()
+
+				maxF32 := b.AllocateInstruction().AsF32const(math.MaxFloat32).Insert(b).Return()
+				oneF32 := b.AllocateInstruction().AsF32const(1.0).Insert(b).Return()
+				addF32 := b.AllocateInstruction().AsFadd(maxF32, oneF32).Insert(b).Return()
+				mulF32 := b.AllocateInstruction().AsFmul(maxF32, maxF32).Insert(b).Return()
+
+				maxF64 := b.AllocateInstruction().AsF64const(math.MaxFloat64).Insert(b).Return()
+				oneF64 := b.AllocateInstruction().AsF64const(1.0).Insert(b).Return()
+				addF64 := b.AllocateInstruction().AsFadd(maxF64, oneF64).Insert(b).Return()
+				mulF64 := b.AllocateInstruction().AsFmul(maxF64, maxF64).Insert(b).Return()
+
+				ret := b.AllocateInstruction()
+				ret.AsReturn([]Value{wrapI32, mulI32, wrapI64, mulI64, addF32, mulF32, addF64, mulF64})
+				b.InsertInstruction(ret)
+				return nil
+			},
+			before: `
+blk0: ()
+	v0:i32 = Iconst_32 0x7fffffff
+	v1:i32 = Iconst_32 0x1
+	v2:i32 = Iadd v0, v1
+	v3:i32 = Imul v0, v0
+	v4:i64 = Iconst_64 0x7fffffffffffffff
+	v5:i64 = Iconst_64 0x1
+	v6:i64 = Iadd v4, v5
+	v7:i64 = Imul v4, v4
+	v8:f32 = F32const 340282346638528859811704183484516925440.000000
+	v9:f32 = F32const 1.000000
+	v10:f32 = Fadd v8, v9
+	v11:f32 = Fmul v8, v8
+	v12:f64 = F64const 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000
+	v13:f64 = F64const 1.000000
+	v14:f64 = Fadd v12, v13
+	v15:f64 = Fmul v12, v12
+	Return v2, v3, v6, v7, v10, v11, v14, v15
+`,
+			after: `
+blk0: ()
+	v2:i32 = Iconst_32 0x80000000
+	v3:i32 = Iconst_32 0x1
+	v6:i64 = Iconst_64 0x8000000000000000
+	v7:i64 = Iconst_64 0x1
+	v10:f32 = F32const 340282346638528859811704183484516925440.000000
+	v11:f32 = F32const +Inf
+	v14:f64 = F64const 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000
+	v15:f64 = F64const +Inf
+	Return v2, v3, v6, v7, v10, v11, v14, v15
 `,
 		},
 	} {

From 2f76dadcb3e1d52963ca8ae0c130ffef63843c88 Mon Sep 17 00:00:00 2001
From: Edoardo Vacchi <evacchi@users.noreply.github.com>
Date: Thu, 30 Nov 2023 10:48:03 +0100
Subject: [PATCH 14/22] float cases

Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
---
 internal/engine/wazevo/ssa/pass_test.go | 110 +++++++++++++++++++-----
 1 file changed, 89 insertions(+), 21 deletions(-)

diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go
index 1b786eceaf..57dc4230f4 100644
--- a/internal/engine/wazevo/ssa/pass_test.go
+++ b/internal/engine/wazevo/ssa/pass_test.go
@@ -514,26 +514,70 @@ blk0: ()
 
 				maxI32 := b.AllocateInstruction().AsIconst32(math.MaxInt32).Insert(b).Return()
 				oneI32 := b.AllocateInstruction().AsIconst32(1).Insert(b).Return()
+				// Iadd MaxInt32, 1 overflows and wraps around to 0x80000000 (min representable Int32)
 				wrapI32 := b.AllocateInstruction().AsIadd(maxI32, oneI32).Insert(b).Return()
+				// Imul MaxInt32, MaxInt32 overflows and wraps around to 0x1.
 				mulI32 := b.AllocateInstruction().AsImul(maxI32, maxI32).Insert(b).Return()
 
+				// Explicitly using the constant because math.MinInt32 is not representable.
+				minI32 := b.AllocateInstruction().AsIconst32(0x80000000).Insert(b).Return()
+				// Isub 0x80000000, 1 overflows and wraps around to 0x7fffffff (max representable Int32)
+				negWrapI32 := b.AllocateInstruction().AsIsub(minI32, oneI32).Insert(b).Return()
+
 				maxI64 := b.AllocateInstruction().AsIconst64(math.MaxInt64).Insert(b).Return()
 				oneI64 := b.AllocateInstruction().AsIconst64(1).Insert(b).Return()
+				// Iadd MaxInt64, 1 overflows and wraps around to 0x8000000000000000 (min representable Int64)
 				wrapI64 := b.AllocateInstruction().AsIadd(maxI64, oneI64).Insert(b).Return()
 				mulI64 := b.AllocateInstruction().AsImul(maxI64, maxI64).Insert(b).Return()
 
+				// Explicitly using the constant because math.MinInt64 is not representable.
+				minI64 := b.AllocateInstruction().AsIconst64(0x8000000000000000).Insert(b).Return()
+				// Isub 0x8000000000000000, 1 overflows and wraps around to 0x7fffffffffffffff (max representable Int64)
+				negWrapI64 := b.AllocateInstruction().AsIsub(minI64, oneI64).Insert(b).Return()
+
 				maxF32 := b.AllocateInstruction().AsF32const(math.MaxFloat32).Insert(b).Return()
 				oneF32 := b.AllocateInstruction().AsF32const(1.0).Insert(b).Return()
+				// Fadd MaxFloat32, 1 absorbs the value and returns MaxFloat32.
 				addF32 := b.AllocateInstruction().AsFadd(maxF32, oneF32).Insert(b).Return()
+				// Fadd MaxFloat32, MaxFloat32 returns +Inf.
+				addF32_2 := b.AllocateInstruction().AsFadd(maxF32, maxF32).Insert(b).Return()
+				// Fmul MaxFloat32, MaxFloat32 returns +Inf.
 				mulF32 := b.AllocateInstruction().AsFmul(maxF32, maxF32).Insert(b).Return()
 
+				minF32 := b.AllocateInstruction().AsF32const(-math.MaxFloat32).Insert(b).Return()
+				// Fsub -MaxFloat32, 1 absorbs the value and returns -MaxFloat32.
+				subF32 := b.AllocateInstruction().AsFsub(minF32, oneF32).Insert(b).Return()
+				// Fsub -MaxFloat32, -MaxFloat32 returns ??
+				subF32_2 := b.AllocateInstruction().AsFadd(minF32, minF32).Insert(b).Return()
+				// Fmul returns +Inf.
+				mulMinF32 := b.AllocateInstruction().AsFmul(minF32, minF32).Insert(b).Return()
+
 				maxF64 := b.AllocateInstruction().AsF64const(math.MaxFloat64).Insert(b).Return()
 				oneF64 := b.AllocateInstruction().AsF64const(1.0).Insert(b).Return()
+				// Fadd MaxFloat64, 1 absorbs the value and returns MaxFloat64.
 				addF64 := b.AllocateInstruction().AsFadd(maxF64, oneF64).Insert(b).Return()
+				// Fadd MaxFloat64, MaxFloat64 returns +Inf.
+				addF64_2 := b.AllocateInstruction().AsFadd(maxF64, maxF64).Insert(b).Return()
+				// Fmul MaxFloat64, MaxFloat64 returns +Inf.
 				mulF64 := b.AllocateInstruction().AsFmul(maxF64, maxF64).Insert(b).Return()
 
+				minF64 := b.AllocateInstruction().AsF64const(-math.MaxFloat64).Insert(b).Return()
+				// Fsub -MaxFloat64, 1 absorbs the value and returns -MaxFloat64.
+				subF64 := b.AllocateInstruction().AsFsub(minF64, oneF64).Insert(b).Return()
+				// Fsub -MaxFloat64, -MaxFloat64 returns -Inf.
+				subF64_2 := b.AllocateInstruction().AsFadd(minF64, minF64).Insert(b).Return()
+				// Fmul -MaxFloat64, -MaxFloat64 returns +Inf.
+				mulMinF64 := b.AllocateInstruction().AsFmul(minF64, minF64).Insert(b).Return()
+
 				ret := b.AllocateInstruction()
-				ret.AsReturn([]Value{wrapI32, mulI32, wrapI64, mulI64, addF32, mulF32, addF64, mulF64})
+				ret.AsReturn([]Value{
+					wrapI32, mulI32, negWrapI32,
+					wrapI64, mulI64, negWrapI64,
+					addF32, addF32_2, mulF32,
+					subF32, subF32_2, mulMinF32,
+					addF64, addF64_2, mulF64,
+					subF64, subF64_2, mulMinF64,
+				})
 				b.InsertInstruction(ret)
 				return nil
 			},
@@ -543,31 +587,55 @@ blk0: ()
 	v1:i32 = Iconst_32 0x1
 	v2:i32 = Iadd v0, v1
 	v3:i32 = Imul v0, v0
-	v4:i64 = Iconst_64 0x7fffffffffffffff
-	v5:i64 = Iconst_64 0x1
-	v6:i64 = Iadd v4, v5
-	v7:i64 = Imul v4, v4
-	v8:f32 = F32const 340282346638528859811704183484516925440.000000
-	v9:f32 = F32const 1.000000
-	v10:f32 = Fadd v8, v9
-	v11:f32 = Fmul v8, v8
-	v12:f64 = F64const 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000
-	v13:f64 = F64const 1.000000
-	v14:f64 = Fadd v12, v13
-	v15:f64 = Fmul v12, v12
-	Return v2, v3, v6, v7, v10, v11, v14, v15
+	v4:i32 = Iconst_32 0x80000000
+	v5:i32 = Isub v4, v1
+	v6:i64 = Iconst_64 0x7fffffffffffffff
+	v7:i64 = Iconst_64 0x1
+	v8:i64 = Iadd v6, v7
+	v9:i64 = Imul v6, v6
+	v10:i64 = Iconst_64 0x8000000000000000
+	v11:i64 = Isub v10, v7
+	v12:f32 = F32const 3.4028235e+38
+	v13:f32 = F32const 1
+	v14:f32 = Fadd v12, v13
+	v15:f32 = Fadd v12, v12
+	v16:f32 = Fmul v12, v12
+	v17:f32 = F32const -3.4028235e+38
+	v18:f32 = Fsub v17, v13
+	v19:f32 = Fadd v17, v17
+	v20:f32 = Fmul v17, v17
+	v21:f64 = F64const 1.7976931348623157e+308
+	v22:f64 = F64const 1
+	v23:f64 = Fadd v21, v22
+	v24:f64 = Fadd v21, v21
+	v25:f64 = Fmul v21, v21
+	v26:f64 = F64const -1.7976931348623157e+308
+	v27:f64 = Fsub v26, v22
+	v28:f64 = Fadd v26, v26
+	v29:f64 = Fmul v26, v26
+	Return v2, v3, v5, v8, v9, v11, v14, v15, v16, v18, v19, v20, v23, v24, v25, v27, v28, v29
 `,
 			after: `
 blk0: ()
 	v2:i32 = Iconst_32 0x80000000
 	v3:i32 = Iconst_32 0x1
-	v6:i64 = Iconst_64 0x8000000000000000
-	v7:i64 = Iconst_64 0x1
-	v10:f32 = F32const 340282346638528859811704183484516925440.000000
-	v11:f32 = F32const +Inf
-	v14:f64 = F64const 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000
-	v15:f64 = F64const +Inf
-	Return v2, v3, v6, v7, v10, v11, v14, v15
+	v5:i32 = Iconst_32 0x7fffffff
+	v8:i64 = Iconst_64 0x8000000000000000
+	v9:i64 = Iconst_64 0x1
+	v11:i64 = Iconst_64 0x7fffffffffffffff
+	v14:f32 = F32const 3.4028235e+38
+	v15:f32 = F32const +Inf
+	v16:f32 = F32const +Inf
+	v18:f32 = F32const -3.4028235e+38
+	v19:f32 = F32const -Inf
+	v20:f32 = F32const +Inf
+	v23:f64 = F64const 1.7976931348623157e+308
+	v24:f64 = F64const +Inf
+	v25:f64 = F64const +Inf
+	v27:f64 = F64const -1.7976931348623157e+308
+	v28:f64 = F64const -Inf
+	v29:f64 = F64const +Inf
+	Return v2, v3, v5, v8, v9, v11, v14, v15, v16, v18, v19, v20, v23, v24, v25, v27, v28, v29
 `,
 		},
 	} {

From cddeb203786ba3e9fcf962bbbb0268958039b350 Mon Sep 17 00:00:00 2001
From: Edoardo Vacchi <evacchi@users.noreply.github.com>
Date: Thu, 30 Nov 2023 10:53:25 +0100
Subject: [PATCH 15/22] use %g to format float constants (ssa)

Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
---
 .../engine/wazevo/frontend/frontend_test.go   | 60 +++++++++----------
 internal/engine/wazevo/ssa/instructions.go    |  4 +-
 internal/engine/wazevo/ssa/pass_test.go       | 26 ++++----
 3 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/internal/engine/wazevo/frontend/frontend_test.go b/internal/engine/wazevo/frontend/frontend_test.go
index f50d3218cc..7ddb821292 100644
--- a/internal/engine/wazevo/frontend/frontend_test.go
+++ b/internal/engine/wazevo/frontend/frontend_test.go
@@ -95,8 +95,8 @@ blk0: (exec_ctx:i64, module_ctx:i64, v2:i32, v3:i32)
 blk0: (exec_ctx:i64, module_ctx:i64)
 	v2:i32 = Iconst_32 0x0
 	v3:i64 = Iconst_64 0x0
-	v4:f32 = F32const 0.000000
-	v5:f64 = F64const 0.000000
+	v4:f32 = F32const 0
+	v5:f64 = F64const 0
 	Jump blk_ret
 `,
 			expAfterOpt: `
@@ -136,8 +136,8 @@ blk0: (exec_ctx:i64, module_ctx:i64, v2:i32)
 blk0: (exec_ctx:i64, module_ctx:i64, v2:i64, v3:f32, v4:f64)
 	v5:i32 = Iconst_32 0x0
 	v6:i64 = Iconst_64 0x0
-	v7:f32 = F32const 0.000000
-	v8:f64 = F64const 0.000000
+	v7:f32 = F32const 0
+	v8:f64 = F64const 0
 	v9:i64 = Iadd v2, v2
 	v10:i64 = Isub v9, v2
 	v11:f32 = Fadd v3, v3
@@ -204,8 +204,8 @@ blk1: () <-- (blk0)
 blk0: (exec_ctx:i64, module_ctx:i64)
 	v2:i32 = Iconst_32 0x0
 	v3:i64 = Iconst_64 0x0
-	v4:f32 = F32const 0.000000
-	v5:f64 = F64const 0.000000
+	v4:f32 = F32const 0
+	v5:f64 = F64const 0
 	Jump blk1
 
 blk1: () <-- (blk0)
@@ -311,8 +311,8 @@ blk3: () <-- (blk1)
 blk0: (exec_ctx:i64, module_ctx:i64)
 	v2:i32 = Iconst_32 0x0
 	v3:i64 = Iconst_64 0x0
-	v4:f32 = F32const 0.000000
-	v5:f64 = F64const 0.000000
+	v4:f32 = F32const 0
+	v5:f64 = F64const 0
 	Jump blk1
 
 blk1: () <-- (blk0)
@@ -885,45 +885,45 @@ blk0: (exec_ctx:i64, module_ctx:i64, v2:i32, v3:f32)
 	v60:i32 = Iadd v9, v59
 	v61:i32 = Iadd v7, v60
 	v62:i32 = Iadd v5, v61
-	v63:f32 = F32const 1.000000
+	v63:f32 = F32const 1
 	v64:f32 = Fmul v3, v63
-	v65:f32 = F32const 2.000000
+	v65:f32 = F32const 2
 	v66:f32 = Fmul v3, v65
-	v67:f32 = F32const 3.000000
+	v67:f32 = F32const 3
 	v68:f32 = Fmul v3, v67
-	v69:f32 = F32const 4.000000
+	v69:f32 = F32const 4
 	v70:f32 = Fmul v3, v69
-	v71:f32 = F32const 5.000000
+	v71:f32 = F32const 5
 	v72:f32 = Fmul v3, v71
-	v73:f32 = F32const 6.000000
+	v73:f32 = F32const 6
 	v74:f32 = Fmul v3, v73
-	v75:f32 = F32const 7.000000
+	v75:f32 = F32const 7
 	v76:f32 = Fmul v3, v75
-	v77:f32 = F32const 8.000000
+	v77:f32 = F32const 8
 	v78:f32 = Fmul v3, v77
-	v79:f32 = F32const 9.000000
+	v79:f32 = F32const 9
 	v80:f32 = Fmul v3, v79
-	v81:f32 = F32const 10.000000
+	v81:f32 = F32const 10
 	v82:f32 = Fmul v3, v81
-	v83:f32 = F32const 11.000000
+	v83:f32 = F32const 11
 	v84:f32 = Fmul v3, v83
-	v85:f32 = F32const 12.000000
+	v85:f32 = F32const 12
 	v86:f32 = Fmul v3, v85
-	v87:f32 = F32const 13.000000
+	v87:f32 = F32const 13
 	v88:f32 = Fmul v3, v87
-	v89:f32 = F32const 14.000000
+	v89:f32 = F32const 14
 	v90:f32 = Fmul v3, v89
-	v91:f32 = F32const 15.000000
+	v91:f32 = F32const 15
 	v92:f32 = Fmul v3, v91
-	v93:f32 = F32const 16.000000
+	v93:f32 = F32const 16
 	v94:f32 = Fmul v3, v93
-	v95:f32 = F32const 17.000000
+	v95:f32 = F32const 17
 	v96:f32 = Fmul v3, v95
-	v97:f32 = F32const 18.000000
+	v97:f32 = F32const 18
 	v98:f32 = Fmul v3, v97
-	v99:f32 = F32const 19.000000
+	v99:f32 = F32const 19
 	v100:f32 = Fmul v3, v99
-	v101:f32 = F32const 20.000000
+	v101:f32 = F32const 20
 	v102:f32 = Fmul v3, v101
 	v103:f32 = Fadd v100, v102
 	v104:f32 = Fadd v98, v103
@@ -1356,10 +1356,10 @@ blk0: (exec_ctx:i64, module_ctx:i64)
 	v4:i64 = Iconst_64 0x2
 	v5:i64 = Load module_ctx, 0x10
 	Store v4, v5, 0x8
-	v6:f32 = F32const 3.000000
+	v6:f32 = F32const 3
 	v7:i64 = Load module_ctx, 0x18
 	Store v6, v7, 0x8
-	v8:f64 = F64const 4.000000
+	v8:f64 = F64const 4
 	v9:i64 = Load module_ctx, 0x20
 	Store v8, v9, 0x8
 	Jump blk_ret, v2, v4, v6, v8
diff --git a/internal/engine/wazevo/ssa/instructions.go b/internal/engine/wazevo/ssa/instructions.go
index 97730e81a5..fa3df6e824 100644
--- a/internal/engine/wazevo/ssa/instructions.go
+++ b/internal/engine/wazevo/ssa/instructions.go
@@ -2394,9 +2394,9 @@ func (i *Instruction) Format(b Builder) string {
 	case OpcodeVconst:
 		instSuffix = fmt.Sprintf(" %016x %016x", i.u1, i.u2)
 	case OpcodeF32const:
-		instSuffix = fmt.Sprintf(" %f", math.Float32frombits(uint32(i.u1)))
+		instSuffix = fmt.Sprintf(" %g", math.Float32frombits(uint32(i.u1)))
 	case OpcodeF64const:
-		instSuffix = fmt.Sprintf(" %f", math.Float64frombits(i.u1))
+		instSuffix = fmt.Sprintf(" %g", math.Float64frombits(i.u1))
 	case OpcodeReturn:
 		if len(i.vs) == 0 {
 			break
diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go
index 57dc4230f4..0528a9485c 100644
--- a/internal/engine/wazevo/ssa/pass_test.go
+++ b/internal/engine/wazevo/ssa/pass_test.go
@@ -472,20 +472,20 @@ blk0: ()
 	v13:i64 = Iconst_64 0x4
 	v14:i64 = Isub v13, v12
 	v15:i64 = Imul v14, v14
-	v16:f32 = F32const 1.000000
-	v17:f32 = F32const 2.000000
+	v16:f32 = F32const 1
+	v17:f32 = F32const 2
 	v18:f32 = Fadd v16, v17
-	v19:f32 = F32const 3.000000
+	v19:f32 = F32const 3
 	v20:f32 = Fadd v18, v19
-	v21:f32 = F32const 4.000000
+	v21:f32 = F32const 4
 	v22:f32 = Fsub v21, v20
 	v23:f32 = Fmul v22, v22
-	v24:f64 = F64const 1.000000
-	v25:f64 = F64const 2.000000
+	v24:f64 = F64const 1
+	v25:f64 = F64const 2
 	v26:f64 = Fadd v24, v25
-	v27:f64 = F64const 3.000000
+	v27:f64 = F64const 3
 	v28:f64 = Fadd v26, v27
-	v29:f64 = F64const 4.000000
+	v29:f64 = F64const 4
 	v30:f64 = Fsub v29, v28
 	v31:f64 = Fmul v30, v30
 	Return v7, v14, v15, v22, v23, v26, v30, v31
@@ -495,11 +495,11 @@ blk0: ()
 	v7:i32 = Iconst_32 0x4
 	v14:i64 = Iconst_64 0xfffffffffffffffe
 	v15:i64 = Iconst_64 0x4
-	v22:f32 = F32const -2.000000
-	v23:f32 = F32const 4.000000
-	v26:f64 = F64const 3.000000
-	v30:f64 = F64const -2.000000
-	v31:f64 = F64const 4.000000
+	v22:f32 = F32const -2
+	v23:f32 = F32const 4
+	v26:f64 = F64const 3
+	v30:f64 = F64const -2
+	v31:f64 = F64const 4
 	Return v7, v14, v15, v22, v23, v26, v30, v31
 `,
 		},

From c8ec025267b64cba2706cecf46d15b7fa6213ec9 Mon Sep 17 00:00:00 2001
From: Edoardo Vacchi <evacchi@users.noreply.github.com>
Date: Mon, 4 Dec 2023 17:08:46 +0100
Subject: [PATCH 16/22] simplify constFold

Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
---
 internal/engine/wazevo/ssa/pass.go      | 41 ++++++++++---------------
 internal/engine/wazevo/ssa/pass_test.go |  1 +
 2 files changed, 18 insertions(+), 24 deletions(-)

diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go
index 24de30bb1b..a51be8bd45 100644
--- a/internal/engine/wazevo/ssa/pass.go
+++ b/internal/engine/wazevo/ssa/pass.go
@@ -369,15 +369,18 @@ func passCollectValueIdToInstructionMapping(b *builder) {
 	}
 }
 
+// passConstFoldingOptMaxIter controls the max number of iterations per-BB, before giving up.
+const passConstFoldingOptMaxIter = 10
+
 // passConstFoldingOpt scans all instructions for arithmetic operations over constants,
-// and replaces them with a const of their result.
+// and replaces them with a const of their result. Repeats for each basic blocks until
+// a fixed point is reached or num iter == passConstFoldingOptMaxIter.
 func passConstFoldingOpt(b *builder) {
 	for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
 		for cur := blk.rootInstr; cur != nil; cur = cur.next {
 			// The fixed point is reached through a simple iteration over the list of instructions.
 			// Note: Instead of just an unbounded loop with a flag, we may also add an upper bound to the number of iterations.
-			isFixedPoint := false
-			for !isFixedPoint {
+			for iter, isFixedPoint := 0, false; iter < passConstFoldingOptMaxIter && !isFixedPoint; iter++ {
 				isFixedPoint = true
 				op := cur.Opcode()
 				switch op {
@@ -399,26 +402,16 @@ func passConstFoldingOpt(b *builder) {
 						// Clear the references to operands.
 						cur.v, cur.v2 = ValueInvalid, ValueInvalid
 						// We assume all the types are consistent.
-						if x.Type().Bits() == 64 {
-							xc, yc := int64(xDef.ConstantVal()), int64(yDef.ConstantVal())
-							switch op {
-							case OpcodeIadd:
-								cur.u1 = uint64(xc + yc)
-							case OpcodeIsub:
-								cur.u1 = uint64(xc - yc)
-							case OpcodeImul:
-								cur.u1 = uint64(xc * yc)
-							}
-						} else {
-							xc, yc := int32(xDef.ConstantVal()), int32(yDef.ConstantVal())
-							switch op {
-							case OpcodeIadd:
-								cur.u1 = uint64(xc + yc)
-							case OpcodeIsub:
-								cur.u1 = uint64(xc - yc)
-							case OpcodeImul:
-								cur.u1 = uint64(xc * yc)
-							}
+						// Signed integers are 2 complement, so we can just apply the operations.
+						// Operations are evaluated over uint64s and will be bitcasted at the use-sites.
+						xc, yc := xDef.ConstantVal(), yDef.ConstantVal()
+						switch op {
+						case OpcodeIadd:
+							cur.u1 = xc + yc
+						case OpcodeIsub:
+							cur.u1 = xc - yc
+						case OpcodeImul:
+							cur.u1 = xc * yc
 						}
 					}
 				case OpcodeFadd, OpcodeFsub, OpcodeFmul:
@@ -426,7 +419,7 @@ func passConstFoldingOpt(b *builder) {
 					xDef := b.valueIDToInstruction[x.ID()]
 					yDef := b.valueIDToInstruction[y.ID()]
 					if xDef == nil || yDef == nil {
-						// If we are adding together some parameter, ignore.
+						// If we are composing some parameter, ignore.
 						continue
 					}
 					if xDef.Constant() && yDef.Constant() {
diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go
index 0528a9485c..5090252da4 100644
--- a/internal/engine/wazevo/ssa/pass_test.go
+++ b/internal/engine/wazevo/ssa/pass_test.go
@@ -528,6 +528,7 @@ blk0: ()
 				oneI64 := b.AllocateInstruction().AsIconst64(1).Insert(b).Return()
 				// Iadd MaxInt64, 1 overflows and wraps around to 0x8000000000000000 (min representable Int64)
 				wrapI64 := b.AllocateInstruction().AsIadd(maxI64, oneI64).Insert(b).Return()
+				// Imul MaxInt64, MaxInt64 overflows and wraps around to 0x1.
 				mulI64 := b.AllocateInstruction().AsImul(maxI64, maxI64).Insert(b).Return()
 
 				// Explicitly using the constant because math.MinInt64 is not representable.

From 2686b575570c3fad6faaea68ba2e0582deb44e05 Mon Sep 17 00:00:00 2001
From: Edoardo Vacchi <evacchi@users.noreply.github.com>
Date: Mon, 4 Dec 2023 21:09:45 +0100
Subject: [PATCH 17/22] some work on algebraic simplification for Iadd

Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
---
 internal/engine/wazevo/ssa/pass.go      | 92 ++++++++++++++++++++++++-
 internal/engine/wazevo/ssa/pass_test.go | 50 +++++++++++++-
 2 files changed, 140 insertions(+), 2 deletions(-)

diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go
index a51be8bd45..9e96ac2c15 100644
--- a/internal/engine/wazevo/ssa/pass.go
+++ b/internal/engine/wazevo/ssa/pass.go
@@ -38,6 +38,9 @@ func (b *builder) RunPasses() {
 	// 	and more!
 
 	passConstFoldingOpt(b)
+	passAlgebraicSimplification(b)
+
+	passCollectValueIdToInstructionMapping(b)
 
 	// passDeadCodeEliminationOpt could be more accurate if we do this after other optimizations.
 	passDeadCodeEliminationOpt(b)
@@ -369,7 +372,7 @@ func passCollectValueIdToInstructionMapping(b *builder) {
 	}
 }
 
-// passConstFoldingOptMaxIter controls the max number of iterations per-BB, before giving up.
+// passConstFoldingOptMaxIter controls the max number of iterations per-BB in passConstFoldingOpt, before giving up.
 const passConstFoldingOptMaxIter = 10
 
 // passConstFoldingOpt scans all instructions for arithmetic operations over constants,
@@ -460,6 +463,93 @@ func passConstFoldingOpt(b *builder) {
 	}
 }
 
+// passAlgebraicSimplificationMaxIter controls the max number of iterations per-BB in passAlgebraicSimplification, before giving up.
+const passAlgebraicSimplificationMaxIter = 10
+
+// passAlgebraicSimplification performs algebraic simplification.
+func passAlgebraicSimplification(b *builder) {
+	// isConstant is a utility for nil-safe check for Constant(). It can be moved into inst.Constant() if useful.
+	isConstant := func(inst *Instruction) bool { return inst != nil && inst.Constant() }
+	// isCanonical returns true when the given pair of instruction resolves to non-constant, constant.
+	isCanonical := func(a, b *Instruction) bool { return !isConstant(a) && isConstant(b) }
+	makeAddConstant := func(yDef, wDef *Instruction) *Instruction {
+		// Create a const as wide as yDef (either 32 or 64-bits), sum the two consts in cur and xDef.
+		// We are assuming the types match.
+		instr := b.AllocateInstruction()
+		instr.opcode = OpcodeIconst
+		instr.typ = yDef.typ
+		instr.u1 = yDef.ConstantVal() + wDef.ConstantVal()
+		instr.rValue = b.allocateValue(yDef.typ)
+		return instr
+	}
+	// TODO: We should first canonicalize operations. E.g, Iadd const, v => Iadd v, const.
+	for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
+		for iter, isFixedPoint := 0, false; iter < passAlgebraicSimplificationMaxIter && !isFixedPoint; iter++ {
+			isFixedPoint = true
+			for cur := blk.rootInstr; cur != nil; cur = cur.next {
+				// The fixed point is reached through a simple iteration over the list of instructions.
+				// Note: Instead of just an unbounded loop with a flag, we may also add an upper bound to the number of iterations.
+				op := cur.Opcode()
+				switch op {
+				// For a given sequence of instructions:
+				//     C0 = Iconst_(32|64) ...
+				//     C1 = Iconst_(32|64) ...
+				//	   V0 = ...
+				//     V1 = Iadd V0, C0
+				//     Vn = Iadd V1, C1
+				// Rewrites Vn to:
+				//     Ck = Iconst_(32|64) C0+C1
+				//     Vn = Iadd V0, Ck
+				// C0, C1, V0, V1 might be deleted by passDeadCodeEliminationOpt
+				// if they are not referenced by other instructions.
+				case OpcodeIadd:
+					x, y := cur.Arg2()
+					xDef, yDef := b.valueIDToInstruction[x.ID()], b.valueIDToInstruction[y.ID()]
+					// Only apply if the referenced value was defined by an Iadd.
+					if xDef == nil || xDef.Opcode() != OpcodeIadd {
+						continue
+					}
+					// Canonical representation is `Iadd Value, Const`
+					if !isCanonical(xDef, yDef) {
+						continue
+					}
+					// Verify the instruction xDef is in the form `Iadd Value, Const`
+					v, w := xDef.Arg2()
+					vDef, wDef := b.valueIDToInstruction[v.ID()], b.valueIDToInstruction[w.ID()]
+					if !isCanonical(vDef, wDef) {
+						continue
+					}
+
+					isFixedPoint = false
+
+					// Create a const as wide as yDef (either 32 or 64-bits), sum the two consts in cur and xDef.
+					// We are assuming the types match.
+					instr := makeAddConstant(yDef, wDef)
+					// Update the current instruction to point to the value referenced by xDef and the new const.
+					cur.v, cur.v2 = v, instr.Return()
+
+					// Update or append the new valueId to the mapping slice.
+					if int(b.nextValueID) >= len(b.valueIDToInstruction) {
+						b.valueIDToInstruction = append(b.valueIDToInstruction, instr)
+					} else {
+						b.valueIDToInstruction[instr.Return().ID()] = instr
+					}
+
+					// Insert the new instruction in the linked list between cur.prev and cur.
+					cur.prev.next = instr
+					instr.prev = cur.prev
+					cur.prev = instr
+					instr.next = cur
+
+					if cur == blk.rootInstr {
+						blk.rootInstr = instr
+					}
+				}
+			}
+		}
+	}
+}
+
 // passSortSuccessors sorts the successors of each block in the natural program order.
 func passSortSuccessors(b *builder) {
 	for i := 0; i < b.basicBlocksPool.Allocated(); i++ {
diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go
index 5090252da4..cf0fb8a8d4 100644
--- a/internal/engine/wazevo/ssa/pass_test.go
+++ b/internal/engine/wazevo/ssa/pass_test.go
@@ -528,7 +528,7 @@ blk0: ()
 				oneI64 := b.AllocateInstruction().AsIconst64(1).Insert(b).Return()
 				// Iadd MaxInt64, 1 overflows and wraps around to 0x8000000000000000 (min representable Int64)
 				wrapI64 := b.AllocateInstruction().AsIadd(maxI64, oneI64).Insert(b).Return()
-				// Imul MaxInt64, MaxInt64 overflows and wraps around to 0x1.
+				// Imul MaxInt64, MaxInt32 overflows and wraps around to 0x1.
 				mulI64 := b.AllocateInstruction().AsImul(maxI64, maxI64).Insert(b).Return()
 
 				// Explicitly using the constant because math.MinInt64 is not representable.
@@ -637,6 +637,54 @@ blk0: ()
 	v28:f64 = F64const -Inf
 	v29:f64 = F64const +Inf
 	Return v2, v3, v5, v8, v9, v11, v14, v15, v16, v18, v19, v20, v23, v24, v25, v27, v28, v29
+`,
+		},
+		{
+			name:     "algebraic simplification",
+			prePass:  passCollectValueIdToInstructionMapping,
+			pass:     passAlgebraicSimplification,
+			postPass: passDeadCodeEliminationOpt,
+			setup: func(b *builder) (verifier func(t *testing.T)) {
+				entry := b.AllocateBasicBlock()
+				b.SetCurrentBlock(entry)
+
+				i32Param := entry.AddParam(b, TypeI32)
+				i64Param := entry.AddParam(b, TypeI64)
+
+				oneI32 := b.AllocateInstruction().AsIconst32(1).Insert(b).Return()
+				twoI32 := b.AllocateInstruction().AsIconst32(2).Insert(b).Return()
+				res1I32 := b.AllocateInstruction().AsIadd(i32Param, oneI32).Insert(b).Return()
+				res2I32 := b.AllocateInstruction().AsIadd(res1I32, twoI32).Insert(b).Return()
+
+				oneI64 := b.AllocateInstruction().AsIconst64(1).Insert(b).Return()
+				twoI64 := b.AllocateInstruction().AsIconst64(2).Insert(b).Return()
+				res1I64 := b.AllocateInstruction().AsIadd(i64Param, oneI64).Insert(b).Return()
+				res2I64 := b.AllocateInstruction().AsIadd(res1I64, twoI64).Insert(b).Return()
+
+				ret := b.AllocateInstruction()
+				ret.AsReturn([]Value{res2I32, res2I64})
+				b.InsertInstruction(ret)
+				return nil
+			},
+			before: `
+blk0: (v0:i32, v1:i64)
+	v2:i32 = Iconst_32 0x1
+	v3:i32 = Iconst_32 0x2
+	v4:i32 = Iadd v0, v2
+	v5:i32 = Iadd v4, v3
+	v6:i64 = Iconst_64 0x1
+	v7:i64 = Iconst_64 0x2
+	v8:i64 = Iadd v1, v6
+	v9:i64 = Iadd v8, v7
+	Return v5, v9
+`,
+			after: `
+blk0: (v0:i32, v1:i64)
+	v10:i32 = Iconst_32 0x3
+	v5:i32 = Iadd v0, v10
+	v11:i64 = Iconst_64 0x3
+	v9:i64 = Iadd v1, v11
+	Return v5, v9
 `,
 		},
 	} {

From 15901190536ce711c822385fe436c3c043f0ffc2 Mon Sep 17 00:00:00 2001
From: Edoardo Vacchi <evacchi@users.noreply.github.com>
Date: Tue, 12 Dec 2023 19:39:15 +0100
Subject: [PATCH 18/22] add more cases, perf still not particularly interesting

Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
---
 internal/engine/wazevo/ssa/pass.go | 45 ++++++++++++++++++++----------
 1 file changed, 30 insertions(+), 15 deletions(-)

diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go
index 9e96ac2c15..955436b588 100644
--- a/internal/engine/wazevo/ssa/pass.go
+++ b/internal/engine/wazevo/ssa/pass.go
@@ -373,7 +373,7 @@ func passCollectValueIdToInstructionMapping(b *builder) {
 }
 
 // passConstFoldingOptMaxIter controls the max number of iterations per-BB in passConstFoldingOpt, before giving up.
-const passConstFoldingOptMaxIter = 10
+const passConstFoldingOptMaxIter = math.MaxInt
 
 // passConstFoldingOpt scans all instructions for arithmetic operations over constants,
 // and replaces them with a const of their result. Repeats for each basic blocks until
@@ -408,14 +408,7 @@ func passConstFoldingOpt(b *builder) {
 						// Signed integers are 2 complement, so we can just apply the operations.
 						// Operations are evaluated over uint64s and will be bitcasted at the use-sites.
 						xc, yc := xDef.ConstantVal(), yDef.ConstantVal()
-						switch op {
-						case OpcodeIadd:
-							cur.u1 = xc + yc
-						case OpcodeIsub:
-							cur.u1 = xc - yc
-						case OpcodeImul:
-							cur.u1 = xc * yc
-						}
+						cur.u1 = eval(op, xc, yc)
 					}
 				case OpcodeFadd, OpcodeFsub, OpcodeFmul:
 					x, y := cur.Arg2()
@@ -463,6 +456,25 @@ func passConstFoldingOpt(b *builder) {
 	}
 }
 
+func eval(op Opcode, xc uint64, yc uint64) uint64 {
+	switch op {
+	case OpcodeIadd:
+		return xc + yc
+	case OpcodeIsub:
+		return xc - yc
+	case OpcodeImul:
+		return xc * yc
+	case OpcodeBor:
+		return xc | yc
+	case OpcodeBand:
+		return xc & yc
+	case OpcodeBxor:
+		return xc ^ yc
+	default:
+		panic("unhandled default case")
+	}
+}
+
 // passAlgebraicSimplificationMaxIter controls the max number of iterations per-BB in passAlgebraicSimplification, before giving up.
 const passAlgebraicSimplificationMaxIter = 10
 
@@ -472,14 +484,15 @@ func passAlgebraicSimplification(b *builder) {
 	isConstant := func(inst *Instruction) bool { return inst != nil && inst.Constant() }
 	// isCanonical returns true when the given pair of instruction resolves to non-constant, constant.
 	isCanonical := func(a, b *Instruction) bool { return !isConstant(a) && isConstant(b) }
-	makeAddConstant := func(yDef, wDef *Instruction) *Instruction {
+	makeConstant := func(yDef, wDef *Instruction, op Opcode) *Instruction {
 		// Create a const as wide as yDef (either 32 or 64-bits), sum the two consts in cur and xDef.
 		// We are assuming the types match.
 		instr := b.AllocateInstruction()
 		instr.opcode = OpcodeIconst
 		instr.typ = yDef.typ
-		instr.u1 = yDef.ConstantVal() + wDef.ConstantVal()
 		instr.rValue = b.allocateValue(yDef.typ)
+		yc, wc := yDef.ConstantVal(), wDef.ConstantVal()
+		instr.u1 = eval(op, yc, wc)
 		return instr
 	}
 	// TODO: We should first canonicalize operations. E.g, Iadd const, v => Iadd v, const.
@@ -502,11 +515,11 @@ func passAlgebraicSimplification(b *builder) {
 				//     Vn = Iadd V0, Ck
 				// C0, C1, V0, V1 might be deleted by passDeadCodeEliminationOpt
 				// if they are not referenced by other instructions.
-				case OpcodeIadd:
+				case OpcodeIadd, OpcodeImul, OpcodeBor, OpcodeBand, OpcodeBxor:
 					x, y := cur.Arg2()
 					xDef, yDef := b.valueIDToInstruction[x.ID()], b.valueIDToInstruction[y.ID()]
-					// Only apply if the referenced value was defined by an Iadd.
-					if xDef == nil || xDef.Opcode() != OpcodeIadd {
+					// Only apply if the referenced value was defined by the same instruction.
+					if xDef == nil || xDef.Opcode() != op {
 						continue
 					}
 					// Canonical representation is `Iadd Value, Const`
@@ -524,7 +537,7 @@ func passAlgebraicSimplification(b *builder) {
 
 					// Create a const as wide as yDef (either 32 or 64-bits), sum the two consts in cur and xDef.
 					// We are assuming the types match.
-					instr := makeAddConstant(yDef, wDef)
+					instr := makeConstant(yDef, wDef, op)
 					// Update the current instruction to point to the value referenced by xDef and the new const.
 					cur.v, cur.v2 = v, instr.Return()
 
@@ -544,6 +557,8 @@ func passAlgebraicSimplification(b *builder) {
 					if cur == blk.rootInstr {
 						blk.rootInstr = instr
 					}
+				default:
+					continue
 				}
 			}
 		}

From 796b9697ad1867ac011fcf5811815fb85fbd403e Mon Sep 17 00:00:00 2001
From: Edoardo Vacchi <evacchi@users.noreply.github.com>
Date: Fri, 15 Dec 2023 15:53:02 +0100
Subject: [PATCH 19/22] wip

Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
---
 internal/engine/wazevo/ssa/pass.go | 133 +++++++----------------------
 1 file changed, 32 insertions(+), 101 deletions(-)

diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go
index 955436b588..e082acdbc6 100644
--- a/internal/engine/wazevo/ssa/pass.go
+++ b/internal/engine/wazevo/ssa/pass.go
@@ -23,8 +23,6 @@ func (b *builder) RunPasses() {
 	passCalculateImmediateDominators(b)
 	passCollectValueIdToInstructionMapping(b)
 
-	passNopInstElimination(b)
-
 	// TODO: implement either conversion of irreducible CFG into reducible one, or irreducible CFG detection where we panic.
 	// 	WebAssembly program shouldn't result in irreducible CFG, but we should handle it properly in just in case.
 	// 	See FixIrreducible pass in LLVM: https://llvm.org/doxygen/FixIrreducible_8cpp_source.html
@@ -38,7 +36,7 @@ func (b *builder) RunPasses() {
 	// 	and more!
 
 	passConstFoldingOpt(b)
-	passAlgebraicSimplification(b)
+	passNopInstElimination(b)
 
 	passCollectValueIdToInstructionMapping(b)
 
@@ -333,10 +331,10 @@ func passNopInstElimination(b *builder) {
 						b.alias(cur.Return(), x)
 					}
 				}
-			// Z := Const 0
-			// (Iadd X, Z) => X
-			// (Iadd Z, Y) => Y
-			case OpcodeIadd:
+			// When Op is Iadd, Bor, Bxor, Rotl or Rotr, and Z is Iconst 0:
+			//   (Op X, Z) => X
+			//   (Op Z, Y) => Y
+			case OpcodeIadd, OpcodeBor, OpcodeBxor, OpcodeRotl, OpcodeRotr:
 				x, y := cur.Arg2()
 				definingInst := b.valueIDToInstruction[y.ID()]
 				if definingInst == nil {
@@ -349,6 +347,26 @@ func passNopInstElimination(b *builder) {
 				if definingInst.Constant() && definingInst.ConstantVal() == 0 {
 					b.alias(cur.Return(), x)
 				}
+			// When Op is Imul and Z is Iconst 1:
+			//   (Op X, Z) => X
+			//   (Op Z, Y) => Y
+			// TODO: This is also valid for UDiv, SDiv, but they are trapping, so we would
+			//       need to update passDeadCodeEliminationOpt to account for this case and mark them dead.
+			case OpcodeImul:
+				x, y := cur.Arg2()
+				definingInst := b.valueIDToInstruction[y.ID()]
+				if definingInst == nil {
+					if definingInst = b.valueIDToInstruction[x.ID()]; definingInst == nil {
+						continue
+					} else {
+						x = y
+					}
+				}
+				if definingInst.Constant() && definingInst.ConstantVal() == 1 {
+					b.alias(cur.Return(), x)
+				}
+			default:
+				continue
 			}
 		}
 	}
@@ -389,7 +407,7 @@ func passConstFoldingOpt(b *builder) {
 				switch op {
 				// X := Const xc
 				// Y := Const yc
-				// - (Iadd X, Y) => Const (xc + yc)
+				// - (op X, Y) => Const (xc <op> yc); e.g. if op is Iadd => xc + yc.
 				case OpcodeIadd, OpcodeIsub, OpcodeImul:
 					x, y := cur.Arg2()
 					xDef := b.valueIDToInstruction[x.ID()]
@@ -408,8 +426,11 @@ func passConstFoldingOpt(b *builder) {
 						// Signed integers are 2 complement, so we can just apply the operations.
 						// Operations are evaluated over uint64s and will be bitcasted at the use-sites.
 						xc, yc := xDef.ConstantVal(), yDef.ConstantVal()
-						cur.u1 = eval(op, xc, yc)
+						cur.u1 = evalArithmeticOp(op, xc, yc)
 					}
+				// X := Const xc
+				// Y := Const yc
+				// - (op X, Y) => Const (xc <op> yc); e.g. if op is Fadd => xc + yc.
 				case OpcodeFadd, OpcodeFsub, OpcodeFmul:
 					x, y := cur.Arg2()
 					xDef := b.valueIDToInstruction[x.ID()]
@@ -456,7 +477,7 @@ func passConstFoldingOpt(b *builder) {
 	}
 }
 
-func eval(op Opcode, xc uint64, yc uint64) uint64 {
+func evalArithmeticOp(op Opcode, xc uint64, yc uint64) uint64 {
 	switch op {
 	case OpcodeIadd:
 		return xc + yc
@@ -471,97 +492,7 @@ func eval(op Opcode, xc uint64, yc uint64) uint64 {
 	case OpcodeBxor:
 		return xc ^ yc
 	default:
-		panic("unhandled default case")
-	}
-}
-
-// passAlgebraicSimplificationMaxIter controls the max number of iterations per-BB in passAlgebraicSimplification, before giving up.
-const passAlgebraicSimplificationMaxIter = 10
-
-// passAlgebraicSimplification performs algebraic simplification.
-func passAlgebraicSimplification(b *builder) {
-	// isConstant is a utility for nil-safe check for Constant(). It can be moved into inst.Constant() if useful.
-	isConstant := func(inst *Instruction) bool { return inst != nil && inst.Constant() }
-	// isCanonical returns true when the given pair of instruction resolves to non-constant, constant.
-	isCanonical := func(a, b *Instruction) bool { return !isConstant(a) && isConstant(b) }
-	makeConstant := func(yDef, wDef *Instruction, op Opcode) *Instruction {
-		// Create a const as wide as yDef (either 32 or 64-bits), sum the two consts in cur and xDef.
-		// We are assuming the types match.
-		instr := b.AllocateInstruction()
-		instr.opcode = OpcodeIconst
-		instr.typ = yDef.typ
-		instr.rValue = b.allocateValue(yDef.typ)
-		yc, wc := yDef.ConstantVal(), wDef.ConstantVal()
-		instr.u1 = eval(op, yc, wc)
-		return instr
-	}
-	// TODO: We should first canonicalize operations. E.g, Iadd const, v => Iadd v, const.
-	for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
-		for iter, isFixedPoint := 0, false; iter < passAlgebraicSimplificationMaxIter && !isFixedPoint; iter++ {
-			isFixedPoint = true
-			for cur := blk.rootInstr; cur != nil; cur = cur.next {
-				// The fixed point is reached through a simple iteration over the list of instructions.
-				// Note: Instead of just an unbounded loop with a flag, we may also add an upper bound to the number of iterations.
-				op := cur.Opcode()
-				switch op {
-				// For a given sequence of instructions:
-				//     C0 = Iconst_(32|64) ...
-				//     C1 = Iconst_(32|64) ...
-				//	   V0 = ...
-				//     V1 = Iadd V0, C0
-				//     Vn = Iadd V1, C1
-				// Rewrites Vn to:
-				//     Ck = Iconst_(32|64) C0+C1
-				//     Vn = Iadd V0, Ck
-				// C0, C1, V0, V1 might be deleted by passDeadCodeEliminationOpt
-				// if they are not referenced by other instructions.
-				case OpcodeIadd, OpcodeImul, OpcodeBor, OpcodeBand, OpcodeBxor:
-					x, y := cur.Arg2()
-					xDef, yDef := b.valueIDToInstruction[x.ID()], b.valueIDToInstruction[y.ID()]
-					// Only apply if the referenced value was defined by the same instruction.
-					if xDef == nil || xDef.Opcode() != op {
-						continue
-					}
-					// Canonical representation is `Iadd Value, Const`
-					if !isCanonical(xDef, yDef) {
-						continue
-					}
-					// Verify the instruction xDef is in the form `Iadd Value, Const`
-					v, w := xDef.Arg2()
-					vDef, wDef := b.valueIDToInstruction[v.ID()], b.valueIDToInstruction[w.ID()]
-					if !isCanonical(vDef, wDef) {
-						continue
-					}
-
-					isFixedPoint = false
-
-					// Create a const as wide as yDef (either 32 or 64-bits), sum the two consts in cur and xDef.
-					// We are assuming the types match.
-					instr := makeConstant(yDef, wDef, op)
-					// Update the current instruction to point to the value referenced by xDef and the new const.
-					cur.v, cur.v2 = v, instr.Return()
-
-					// Update or append the new valueId to the mapping slice.
-					if int(b.nextValueID) >= len(b.valueIDToInstruction) {
-						b.valueIDToInstruction = append(b.valueIDToInstruction, instr)
-					} else {
-						b.valueIDToInstruction[instr.Return().ID()] = instr
-					}
-
-					// Insert the new instruction in the linked list between cur.prev and cur.
-					cur.prev.next = instr
-					instr.prev = cur.prev
-					cur.prev = instr
-					instr.next = cur
-
-					if cur == blk.rootInstr {
-						blk.rootInstr = instr
-					}
-				default:
-					continue
-				}
-			}
-		}
+		panic("unhandled default case " + op.String())
 	}
 }
 

From a4b5549381bdc86ad8888996fdb17b79e4ec3d96 Mon Sep 17 00:00:00 2001
From: Edoardo Vacchi <evacchi@users.noreply.github.com>
Date: Fri, 15 Dec 2023 15:54:00 +0100
Subject: [PATCH 20/22] wip

Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
---
 .../engine/wazevo/backend/backend_test.go     | 280 +++++++++---------
 1 file changed, 138 insertions(+), 142 deletions(-)

diff --git a/internal/engine/wazevo/backend/backend_test.go b/internal/engine/wazevo/backend/backend_test.go
index 592e20a8d7..0fe7d4be0a 100644
--- a/internal/engine/wazevo/backend/backend_test.go
+++ b/internal/engine/wazevo/backend/backend_test.go
@@ -1394,126 +1394,124 @@ L1 (SSA Block: blk0):
 L1 (SSA Block: blk0):
 	mov x130?, x2
 	mov v131?.8b, v0.8b
-	orr w289?, wzr, #0x1
-	madd w133?, w130?, w289?, wzr
-	orr w288?, wzr, #0x2
-	madd w135?, w130?, w288?, wzr
-	orr w287?, wzr, #0x3
-	madd w137?, w130?, w287?, wzr
-	orr w286?, wzr, #0x4
-	madd w139?, w130?, w286?, wzr
-	movz w285?, #0x5, lsl 0
-	madd w141?, w130?, w285?, wzr
-	orr w284?, wzr, #0x6
-	madd w143?, w130?, w284?, wzr
-	orr w283?, wzr, #0x7
-	madd w145?, w130?, w283?, wzr
-	orr w282?, wzr, #0x8
-	madd w147?, w130?, w282?, wzr
-	movz w281?, #0x9, lsl 0
-	madd w149?, w130?, w281?, wzr
-	movz w280?, #0xa, lsl 0
-	madd w151?, w130?, w280?, wzr
-	movz w279?, #0xb, lsl 0
-	madd w153?, w130?, w279?, wzr
-	orr w278?, wzr, #0xc
-	madd w155?, w130?, w278?, wzr
-	movz w277?, #0xd, lsl 0
-	madd w157?, w130?, w277?, wzr
-	orr w276?, wzr, #0xe
-	madd w159?, w130?, w276?, wzr
-	orr w275?, wzr, #0xf
-	madd w161?, w130?, w275?, wzr
-	orr w274?, wzr, #0x10
-	madd w163?, w130?, w274?, wzr
-	movz w273?, #0x11, lsl 0
-	madd w165?, w130?, w273?, wzr
-	movz w272?, #0x12, lsl 0
-	madd w167?, w130?, w272?, wzr
-	movz w271?, #0x13, lsl 0
-	madd w169?, w130?, w271?, wzr
-	movz w270?, #0x14, lsl 0
-	madd w171?, w130?, w270?, wzr
-	add w172?, w169?, w171?
-	add w173?, w167?, w172?
-	add w174?, w165?, w173?
-	add w175?, w163?, w174?
-	add w176?, w161?, w175?
-	add w177?, w159?, w176?
-	add w178?, w157?, w177?
-	add w179?, w155?, w178?
-	add w180?, w153?, w179?
-	add w181?, w151?, w180?
-	add w182?, w149?, w181?
-	add w183?, w147?, w182?
-	add w184?, w145?, w183?
-	add w185?, w143?, w184?
-	add w186?, w141?, w185?
-	add w187?, w139?, w186?
-	add w188?, w137?, w187?
-	add w189?, w135?, w188?
-	add w190?, w133?, w189?
-	ldr s269?, #8; b 8; data.f32 1.000000
-	fmul s192?, s131?, s269?
-	ldr s268?, #8; b 8; data.f32 2.000000
-	fmul s194?, s131?, s268?
-	ldr s267?, #8; b 8; data.f32 3.000000
-	fmul s196?, s131?, s267?
-	ldr s266?, #8; b 8; data.f32 4.000000
-	fmul s198?, s131?, s266?
-	ldr s265?, #8; b 8; data.f32 5.000000
-	fmul s200?, s131?, s265?
-	ldr s264?, #8; b 8; data.f32 6.000000
-	fmul s202?, s131?, s264?
-	ldr s263?, #8; b 8; data.f32 7.000000
-	fmul s204?, s131?, s263?
-	ldr s262?, #8; b 8; data.f32 8.000000
-	fmul s206?, s131?, s262?
-	ldr s261?, #8; b 8; data.f32 9.000000
-	fmul s208?, s131?, s261?
-	ldr s260?, #8; b 8; data.f32 10.000000
-	fmul s210?, s131?, s260?
-	ldr s259?, #8; b 8; data.f32 11.000000
-	fmul s212?, s131?, s259?
-	ldr s258?, #8; b 8; data.f32 12.000000
-	fmul s214?, s131?, s258?
-	ldr s257?, #8; b 8; data.f32 13.000000
-	fmul s216?, s131?, s257?
-	ldr s256?, #8; b 8; data.f32 14.000000
-	fmul s218?, s131?, s256?
-	ldr s255?, #8; b 8; data.f32 15.000000
-	fmul s220?, s131?, s255?
-	ldr s254?, #8; b 8; data.f32 16.000000
-	fmul s222?, s131?, s254?
-	ldr s253?, #8; b 8; data.f32 17.000000
-	fmul s224?, s131?, s253?
-	ldr s252?, #8; b 8; data.f32 18.000000
-	fmul s226?, s131?, s252?
-	ldr s251?, #8; b 8; data.f32 19.000000
-	fmul s228?, s131?, s251?
-	ldr s250?, #8; b 8; data.f32 20.000000
-	fmul s230?, s131?, s250?
-	fadd s231?, s228?, s230?
-	fadd s232?, s226?, s231?
-	fadd s233?, s224?, s232?
-	fadd s234?, s222?, s233?
-	fadd s235?, s220?, s234?
-	fadd s236?, s218?, s235?
-	fadd s237?, s216?, s236?
-	fadd s238?, s214?, s237?
-	fadd s239?, s212?, s238?
-	fadd s240?, s210?, s239?
-	fadd s241?, s208?, s240?
-	fadd s242?, s206?, s241?
-	fadd s243?, s204?, s242?
-	fadd s244?, s202?, s243?
-	fadd s245?, s200?, s244?
-	fadd s246?, s198?, s245?
-	fadd s247?, s196?, s246?
-	fadd s248?, s194?, s247?
-	fadd s249?, s192?, s248?
-	mov v0.8b, v249?.8b
-	mov x0, x190?
+	orr w286?, wzr, #0x2
+	madd w133?, w130?, w286?, wzr
+	orr w285?, wzr, #0x3
+	madd w135?, w130?, w285?, wzr
+	orr w284?, wzr, #0x4
+	madd w137?, w130?, w284?, wzr
+	movz w283?, #0x5, lsl 0
+	madd w139?, w130?, w283?, wzr
+	orr w282?, wzr, #0x6
+	madd w141?, w130?, w282?, wzr
+	orr w281?, wzr, #0x7
+	madd w143?, w130?, w281?, wzr
+	orr w280?, wzr, #0x8
+	madd w145?, w130?, w280?, wzr
+	movz w279?, #0x9, lsl 0
+	madd w147?, w130?, w279?, wzr
+	movz w278?, #0xa, lsl 0
+	madd w149?, w130?, w278?, wzr
+	movz w277?, #0xb, lsl 0
+	madd w151?, w130?, w277?, wzr
+	orr w276?, wzr, #0xc
+	madd w153?, w130?, w276?, wzr
+	movz w275?, #0xd, lsl 0
+	madd w155?, w130?, w275?, wzr
+	orr w274?, wzr, #0xe
+	madd w157?, w130?, w274?, wzr
+	orr w273?, wzr, #0xf
+	madd w159?, w130?, w273?, wzr
+	orr w272?, wzr, #0x10
+	madd w161?, w130?, w272?, wzr
+	movz w271?, #0x11, lsl 0
+	madd w163?, w130?, w271?, wzr
+	movz w270?, #0x12, lsl 0
+	madd w165?, w130?, w270?, wzr
+	movz w269?, #0x13, lsl 0
+	madd w167?, w130?, w269?, wzr
+	movz w268?, #0x14, lsl 0
+	madd w169?, w130?, w268?, wzr
+	add w170?, w167?, w169?
+	add w171?, w165?, w170?
+	add w172?, w163?, w171?
+	add w173?, w161?, w172?
+	add w174?, w159?, w173?
+	add w175?, w157?, w174?
+	add w176?, w155?, w175?
+	add w177?, w153?, w176?
+	add w178?, w151?, w177?
+	add w179?, w149?, w178?
+	add w180?, w147?, w179?
+	add w181?, w145?, w180?
+	add w182?, w143?, w181?
+	add w183?, w141?, w182?
+	add w184?, w139?, w183?
+	add w185?, w137?, w184?
+	add w186?, w135?, w185?
+	add w187?, w133?, w186?
+	add w188?, w130?, w187?
+	ldr s267?, #8; b 8; data.f32 1.000000
+	fmul s190?, s131?, s267?
+	ldr s266?, #8; b 8; data.f32 2.000000
+	fmul s192?, s131?, s266?
+	ldr s265?, #8; b 8; data.f32 3.000000
+	fmul s194?, s131?, s265?
+	ldr s264?, #8; b 8; data.f32 4.000000
+	fmul s196?, s131?, s264?
+	ldr s263?, #8; b 8; data.f32 5.000000
+	fmul s198?, s131?, s263?
+	ldr s262?, #8; b 8; data.f32 6.000000
+	fmul s200?, s131?, s262?
+	ldr s261?, #8; b 8; data.f32 7.000000
+	fmul s202?, s131?, s261?
+	ldr s260?, #8; b 8; data.f32 8.000000
+	fmul s204?, s131?, s260?
+	ldr s259?, #8; b 8; data.f32 9.000000
+	fmul s206?, s131?, s259?
+	ldr s258?, #8; b 8; data.f32 10.000000
+	fmul s208?, s131?, s258?
+	ldr s257?, #8; b 8; data.f32 11.000000
+	fmul s210?, s131?, s257?
+	ldr s256?, #8; b 8; data.f32 12.000000
+	fmul s212?, s131?, s256?
+	ldr s255?, #8; b 8; data.f32 13.000000
+	fmul s214?, s131?, s255?
+	ldr s254?, #8; b 8; data.f32 14.000000
+	fmul s216?, s131?, s254?
+	ldr s253?, #8; b 8; data.f32 15.000000
+	fmul s218?, s131?, s253?
+	ldr s252?, #8; b 8; data.f32 16.000000
+	fmul s220?, s131?, s252?
+	ldr s251?, #8; b 8; data.f32 17.000000
+	fmul s222?, s131?, s251?
+	ldr s250?, #8; b 8; data.f32 18.000000
+	fmul s224?, s131?, s250?
+	ldr s249?, #8; b 8; data.f32 19.000000
+	fmul s226?, s131?, s249?
+	ldr s248?, #8; b 8; data.f32 20.000000
+	fmul s228?, s131?, s248?
+	fadd s229?, s226?, s228?
+	fadd s230?, s224?, s229?
+	fadd s231?, s222?, s230?
+	fadd s232?, s220?, s231?
+	fadd s233?, s218?, s232?
+	fadd s234?, s216?, s233?
+	fadd s235?, s214?, s234?
+	fadd s236?, s212?, s235?
+	fadd s237?, s210?, s236?
+	fadd s238?, s208?, s237?
+	fadd s239?, s206?, s238?
+	fadd s240?, s204?, s239?
+	fadd s241?, s202?, s240?
+	fadd s242?, s200?, s241?
+	fadd s243?, s198?, s242?
+	fadd s244?, s196?, s243?
+	fadd s245?, s194?, s244?
+	fadd s246?, s192?, s245?
+	fadd s247?, s190?, s246?
+	mov v0.8b, v247?.8b
+	mov x0, x188?
 	ret
 `,
 			afterFinalizeARM64: `
@@ -1539,47 +1537,44 @@ L1 (SSA Block: blk0):
 	str q27, [sp, #-0x10]!
 	movz x27, #0x120, lsl 0
 	str x27, [sp, #-0x10]!
-	orr w8, wzr, #0x1
+	orr w8, wzr, #0x2
 	madd w8, w2, w8, wzr
-	orr w9, wzr, #0x2
+	orr w9, wzr, #0x3
 	madd w9, w2, w9, wzr
-	orr w10, wzr, #0x3
+	orr w10, wzr, #0x4
 	madd w10, w2, w10, wzr
-	orr w11, wzr, #0x4
+	movz w11, #0x5, lsl 0
 	madd w11, w2, w11, wzr
-	movz w12, #0x5, lsl 0
+	orr w12, wzr, #0x6
 	madd w12, w2, w12, wzr
-	orr w13, wzr, #0x6
+	orr w13, wzr, #0x7
 	madd w13, w2, w13, wzr
-	orr w14, wzr, #0x7
+	orr w14, wzr, #0x8
 	madd w14, w2, w14, wzr
-	orr w15, wzr, #0x8
+	movz w15, #0x9, lsl 0
 	madd w15, w2, w15, wzr
-	movz w16, #0x9, lsl 0
+	movz w16, #0xa, lsl 0
 	madd w16, w2, w16, wzr
-	movz w17, #0xa, lsl 0
+	movz w17, #0xb, lsl 0
 	madd w17, w2, w17, wzr
-	movz w19, #0xb, lsl 0
+	orr w19, wzr, #0xc
 	madd w19, w2, w19, wzr
-	orr w20, wzr, #0xc
+	movz w20, #0xd, lsl 0
 	madd w20, w2, w20, wzr
-	movz w21, #0xd, lsl 0
+	orr w21, wzr, #0xe
 	madd w21, w2, w21, wzr
-	orr w22, wzr, #0xe
+	orr w22, wzr, #0xf
 	madd w22, w2, w22, wzr
-	orr w23, wzr, #0xf
+	orr w23, wzr, #0x10
 	madd w23, w2, w23, wzr
-	orr w24, wzr, #0x10
+	movz w24, #0x11, lsl 0
 	madd w24, w2, w24, wzr
-	movz w25, #0x11, lsl 0
+	movz w25, #0x12, lsl 0
 	madd w25, w2, w25, wzr
-	movz w26, #0x12, lsl 0
+	movz w26, #0x13, lsl 0
 	madd w26, w2, w26, wzr
-	movz w29, #0x13, lsl 0
+	movz w29, #0x14, lsl 0
 	madd w29, w2, w29, wzr
-	movz w30, #0x14, lsl 0
-	madd w30, w2, w30, wzr
-	add w29, w29, w30
 	add w26, w26, w29
 	add w25, w25, w26
 	add w24, w24, w25
@@ -1598,6 +1593,7 @@ L1 (SSA Block: blk0):
 	add w10, w10, w11
 	add w9, w9, w10
 	add w8, w8, w9
+	add w8, w2, w8
 	ldr s8, #8; b 8; data.f32 1.000000
 	fmul s8, s0, s8
 	ldr s9, #8; b 8; data.f32 2.000000

From d9a81d34913a5a05e9cb415b1443cd6d4cf3807c Mon Sep 17 00:00:00 2001
From: Edoardo Vacchi <evacchi@users.noreply.github.com>
Date: Fri, 15 Dec 2023 15:57:07 +0100
Subject: [PATCH 21/22] format

Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
---
 internal/engine/wazevo/ssa/pass_test.go | 48 -------------------------
 1 file changed, 48 deletions(-)

diff --git a/internal/engine/wazevo/ssa/pass_test.go b/internal/engine/wazevo/ssa/pass_test.go
index cf0fb8a8d4..f5e46b7636 100644
--- a/internal/engine/wazevo/ssa/pass_test.go
+++ b/internal/engine/wazevo/ssa/pass_test.go
@@ -637,54 +637,6 @@ blk0: ()
 	v28:f64 = F64const -Inf
 	v29:f64 = F64const +Inf
 	Return v2, v3, v5, v8, v9, v11, v14, v15, v16, v18, v19, v20, v23, v24, v25, v27, v28, v29
-`,
-		},
-		{
-			name:     "algebraic simplification",
-			prePass:  passCollectValueIdToInstructionMapping,
-			pass:     passAlgebraicSimplification,
-			postPass: passDeadCodeEliminationOpt,
-			setup: func(b *builder) (verifier func(t *testing.T)) {
-				entry := b.AllocateBasicBlock()
-				b.SetCurrentBlock(entry)
-
-				i32Param := entry.AddParam(b, TypeI32)
-				i64Param := entry.AddParam(b, TypeI64)
-
-				oneI32 := b.AllocateInstruction().AsIconst32(1).Insert(b).Return()
-				twoI32 := b.AllocateInstruction().AsIconst32(2).Insert(b).Return()
-				res1I32 := b.AllocateInstruction().AsIadd(i32Param, oneI32).Insert(b).Return()
-				res2I32 := b.AllocateInstruction().AsIadd(res1I32, twoI32).Insert(b).Return()
-
-				oneI64 := b.AllocateInstruction().AsIconst64(1).Insert(b).Return()
-				twoI64 := b.AllocateInstruction().AsIconst64(2).Insert(b).Return()
-				res1I64 := b.AllocateInstruction().AsIadd(i64Param, oneI64).Insert(b).Return()
-				res2I64 := b.AllocateInstruction().AsIadd(res1I64, twoI64).Insert(b).Return()
-
-				ret := b.AllocateInstruction()
-				ret.AsReturn([]Value{res2I32, res2I64})
-				b.InsertInstruction(ret)
-				return nil
-			},
-			before: `
-blk0: (v0:i32, v1:i64)
-	v2:i32 = Iconst_32 0x1
-	v3:i32 = Iconst_32 0x2
-	v4:i32 = Iadd v0, v2
-	v5:i32 = Iadd v4, v3
-	v6:i64 = Iconst_64 0x1
-	v7:i64 = Iconst_64 0x2
-	v8:i64 = Iadd v1, v6
-	v9:i64 = Iadd v8, v7
-	Return v5, v9
-`,
-			after: `
-blk0: (v0:i32, v1:i64)
-	v10:i32 = Iconst_32 0x3
-	v5:i32 = Iadd v0, v10
-	v11:i64 = Iconst_64 0x3
-	v9:i64 = Iadd v1, v11
-	Return v5, v9
 `,
 		},
 	} {

From 7ff515583914e2df435924512e4f1abbdfdfe789 Mon Sep 17 00:00:00 2001
From: Edoardo Vacchi <evacchi@users.noreply.github.com>
Date: Fri, 15 Dec 2023 16:03:47 +0100
Subject: [PATCH 22/22] remove useless extra pass

Signed-off-by: Edoardo Vacchi <evacchi@users.noreply.github.com>
---
 internal/engine/wazevo/ssa/pass.go | 2 --
 1 file changed, 2 deletions(-)

diff --git a/internal/engine/wazevo/ssa/pass.go b/internal/engine/wazevo/ssa/pass.go
index e082acdbc6..3446311e11 100644
--- a/internal/engine/wazevo/ssa/pass.go
+++ b/internal/engine/wazevo/ssa/pass.go
@@ -38,8 +38,6 @@ func (b *builder) RunPasses() {
 	passConstFoldingOpt(b)
 	passNopInstElimination(b)
 
-	passCollectValueIdToInstructionMapping(b)
-
 	// passDeadCodeEliminationOpt could be more accurate if we do this after other optimizations.
 	passDeadCodeEliminationOpt(b)
 	b.donePasses = true