Skip to content

Commit

Permalink
#368 GBA conditional mul in rasterizer
Browse files Browse the repository at this point in the history
  • Loading branch information
XProger committed Dec 4, 2022
1 parent 3ba5ec3 commit 5c13524
Show file tree
Hide file tree
Showing 7 changed files with 67 additions and 71 deletions.
6 changes: 3 additions & 3 deletions src/platform/gba/asm/common_asm.inc
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,11 @@
.endm

.macro scaleUV uv, tmp, tmp2, f
smull \tmp2, \tmp, \uv, \f // u = (f * uv) >> 32
smullne \tmp2, \tmp, \uv, \f // u = (f * uv) >> 32

lsl \uv, #16
asr \uv, #16
mul \uv, \f // v = f * int16(uv)
asrs \uv, #16
mulne \uv, \f // v = f * int16(uv)
lsr \uv, #16

orr \uv, \uv, \tmp, lsl #16 // uv = (u & 0xFFFF0000) | (v >> 16)
Expand Down
14 changes: 6 additions & 8 deletions src/platform/gba/asm/rasterizeF.s
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ Lxy .req tmp
Ly2 .req Lh
LMAP .req Lx
ptr .req tmp
Ltmp .req N
Rtmp .req N

.global rasterizeF_asm
rasterizeF_asm:
Expand Down Expand Up @@ -57,9 +55,9 @@ rasterizeF_asm:

divLUT tmp, Lh // tmp = FixedInvU(Lh)

ldrsh Ltmp, [L, #VERTEX_X]
sub Ltmp, Lx, asr #16
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - L->v.x)
ldrsh Ldx, [L, #VERTEX_X]
subs Ldx, Lx, asr #16
mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - L->v.x)
.calc_left_end:

cmp Rh, #0
Expand All @@ -81,9 +79,9 @@ rasterizeF_asm:

divLUT tmp, Rh // tmp = FixedInvU(Rh)

ldrsh Rtmp, [R, #VERTEX_X]
sub Rtmp, Rx, asr #16
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
ldrsh Rdx, [R, #VERTEX_X]
subs Rdx, Rx, asr #16
mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)
.calc_right_end:

cmp Rh, Lh // if (Rh < Lh)
Expand Down
18 changes: 9 additions & 9 deletions src/platform/gba/asm/rasterizeFT.s
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,12 @@ rasterizeFT_asm:

divLUT tmp, Lh // tmp = FixedInvU(Lh)

ldrsh Ltmp, [L, #VERTEX_X]
sub Ltmp, Lx, asr #16
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx)
ldrsh Ldx, [L, #VERTEX_X]
subs Ldx, Lx, asr #16
mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx)

ldr Ldt, [L, #VERTEX_T]
sub Ldt, Lt // Ldt = N->v.t - Lt
subs Ldt, Lt // Ldt = N->v.t - Lt
scaleUV Ldt, Ltmp, Ltmp2, tmp
.calc_left_end:

Expand All @@ -125,12 +125,12 @@ rasterizeFT_asm:

divLUT tmp, Rh // tmp = FixedInvU(Rh)

ldrsh Rtmp, [R, #VERTEX_X]
sub Rtmp, Rx, asr #16
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
ldrsh Rdx, [R, #VERTEX_X]
subs Rdx, Rx, asr #16
mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)

ldr Rdt, [R, #VERTEX_T]
sub Rdt, Rt // Rdt = N->v.t - Rt
subs Rdt, Rt // Rdt = N->v.t - Rt
scaleUV Rdt, Rtmp, Rtmp2, tmp
.calc_right_end:

Expand All @@ -153,7 +153,7 @@ rasterizeFT_asm:

divLUT inv, width // inv = FixedInvU(width)

sub dtdx, Rt, Lt // duv = Rt - Lt
subs dtdx, Rt, Lt // duv = Rt - Lt
scaleUV dtdx, dtmp, dtmp2, inv

mov t, Lt // t = Lt
Expand Down
18 changes: 9 additions & 9 deletions src/platform/gba/asm/rasterizeFTA.s
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,12 @@ rasterizeFTA_asm:

divLUT tmp, Lh // tmp = FixedInvU(Lh)

ldrsh Ltmp, [L, #VERTEX_X]
sub Ltmp, Lx, asr #16
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx)
ldrsh Ldx, [L, #VERTEX_X]
subs Ldx, Lx, asr #16
mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx)

ldr Ldt, [L, #VERTEX_T]
sub Ldt, Lt // Ldt = N->v.t - Lt
subs Ldt, Lt // Ldt = N->v.t - Lt
scaleUV Ldt, Ltmp, Ltmp2, tmp
.calc_left_end:

Expand All @@ -126,12 +126,12 @@ rasterizeFTA_asm:

divLUT tmp, Rh // tmp = FixedInvU(Rh)

ldrsh Rtmp, [R, #VERTEX_X]
sub Rtmp, Rx, asr #16
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
ldrsh Rdx, [R, #VERTEX_X]
subs Rdx, Rx, asr #16
mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)

ldr Rdt, [R, #VERTEX_T]
sub Rdt, Rt // Rdt = N->v.t - Rt
subs Rdt, Rt // Rdt = N->v.t - Rt
scaleUV Rdt, Rtmp, Rtmp2, tmp
.calc_right_end:

Expand All @@ -154,7 +154,7 @@ rasterizeFTA_asm:

divLUT inv, width // inv = FixedInvU(width)

sub dtdx, Rt, Lt // duv = Rt - Lt
subs dtdx, Rt, Lt // duv = Rt - Lt
scaleUV dtdx, dtmp, dtmp2, inv

mov t, Lt // t = Lt
Expand Down
34 changes: 17 additions & 17 deletions src/platform/gba/asm/rasterizeGT.s
Original file line number Diff line number Diff line change
Expand Up @@ -107,17 +107,17 @@ rasterizeGT_asm:
divLUT tmp, Lh // tmp = FixedInvU(Lh)

fiq_on
ldrsh Ltmp, [N, #VERTEX_X]
sub Ltmp, Lx, asr #16
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx)
ldrsh Ldx, [N, #VERTEX_X]
subs Ldx, Lx, asr #16
mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx)

ldrb Ltmp, [N, #VERTEX_G]
sub Ltmp, Lg, lsr #(8 + G_EXTRA)
mul Ldg, tmp, Ltmp // Ldg = tmp * (N->v.g - Lg)
ldrb Ldg, [N, #VERTEX_G]
subs Ldg, Lg, lsr #(8 + G_EXTRA)
mulne Ldg, tmp, Ldg // Ldg = tmp * (N->v.g - Lg)
asr Ldg, #(8 - G_EXTRA) // (8 + G_EXTRA)-bit for fractional part

ldr Ldt, [N, #VERTEX_T]
sub Ldt, Lt // Ldt = N->v.t - Lt
subs Ldt, Lt // Ldt = N->v.t - Lt
scaleUV Ldt, Ltmp, Ltmp2, tmp
fiq_off
.calc_left_end:
Expand Down Expand Up @@ -146,17 +146,17 @@ rasterizeGT_asm:
divLUT tmp, Rh // tmp = FixedInvU(Rh)

fiq_on
ldrsh Rtmp, [N, #VERTEX_X]
sub Rtmp, Rx, asr #16
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
ldrsh Rdx, [N, #VERTEX_X]
subs Rdx, Rx, asr #16
mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)

ldrb Rtmp, [N, #VERTEX_G]
sub Rtmp, Rg, lsr #(8 + G_EXTRA)
mul Rdg, tmp, Rtmp // Rdg = tmp * (N->v.g - Rg)
ldrb Rdg, [N, #VERTEX_G]
subs Rdg, Rg, lsr #(8 + G_EXTRA)
mulne Rdg, tmp, Rdg // Rdg = tmp * (N->v.g - Rg)
asr Rdg, #(8 - G_EXTRA) // (8 + G_EXTRA)-bit for fractional part

ldr Rdt, [N, #VERTEX_T]
sub Rdt, Rt // Rdt = N->v.t - Rt
subs Rdt, Rt // Rdt = N->v.t - Rt
scaleUV Rdt, Rtmp, Rtmp2, tmp
fiq_off
.calc_right_end:
Expand All @@ -183,12 +183,12 @@ rasterizeGT_asm:

divLUT inv, width // inv = FixedInvU(width)

sub dtdx, Rt, Lt // dtdx = Rt - Lt
subs dtdx, Rt, Lt // dtdx = Rt - Lt
scaleUV dtdx, dtmp, dtmp2, inv
// t == Lt (alias)

sub dgdx, Rg, Lg // dgdx = Rg - Lg
mul dgdx, inv // dgdx *= FixedInvU(width)
subs dgdx, Rg, Lg // dgdx = Rg - Lg
mulne dgdx, inv // dgdx *= FixedInvU(width)
asr dgdx, #16 // dgdx >>= 16
// g == Lg (alias)

Expand Down
34 changes: 17 additions & 17 deletions src/platform/gba/asm/rasterizeGTA.s
Original file line number Diff line number Diff line change
Expand Up @@ -106,17 +106,17 @@ rasterizeGTA_asm:
divLUT tmp, Lh // tmp = FixedInvU(Lh)

fiq_on
ldrsh Ltmp, [N, #VERTEX_X]
sub Ltmp, Lx, asr #16
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx)
ldrsh Ldx, [N, #VERTEX_X]
subs Ldx, Lx, asr #16
mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx)

ldrb Ltmp, [N, #VERTEX_G]
sub Ltmp, Lg, lsr #8
mul Ldg, tmp, Ltmp // Ldg = tmp * (N->v.g - Lg)
ldrb Ldg, [N, #VERTEX_G]
subs Ldg, Lg, lsr #8
mulne Ldg, tmp, Ldg // Ldg = tmp * (N->v.g - Lg)
asr Ldg, #8 // 8-bit for fractional part

ldr Ldt, [N, #VERTEX_T]
sub Ldt, Lt // Ldt = N->v.t - Lt
subs Ldt, Lt // Ldt = N->v.t - Lt
scaleUV Ldt, Ltmp, Ltmp2, tmp
fiq_off
.calc_left_end:
Expand Down Expand Up @@ -145,17 +145,17 @@ rasterizeGTA_asm:
divLUT tmp, Rh // tmp = FixedInvU(Rh)

fiq_on
ldrsh Rtmp, [N, #VERTEX_X]
sub Rtmp, Rx, asr #16
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
ldrsh Rdx, [N, #VERTEX_X]
subs Rdx, Rx, asr #16
mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)

ldrb Rtmp, [N, #VERTEX_G]
sub Rtmp, Rg, lsr #8
mul Rdg, tmp, Rtmp // Rdg = tmp * (N->v.g - Rg)
ldrb Rdg, [N, #VERTEX_G]
subs Rdg, Rg, lsr #8
mulne Rdg, tmp, Rdg // Rdg = tmp * (N->v.g - Rg)
asr Rdg, #8 // 8-bit for fractional part

ldr Rdt, [N, #VERTEX_T]
sub Rdt, Rt // Rdt = N->v.t - Rt
subs Rdt, Rt // Rdt = N->v.t - Rt
scaleUV Rdt, Rtmp, Rtmp2, tmp
fiq_off
.calc_right_end:
Expand All @@ -182,12 +182,12 @@ rasterizeGTA_asm:

divLUT inv, width // inv = FixedInvU(width)

sub dtdx, Rt, Lt // dtdx = Rt - Lt
subs dtdx, Rt, Lt // dtdx = Rt - Lt
scaleUV dtdx, dtmp, dtmp2, inv
// t == Lt (alias)

sub dgdx, Rg, Lg // dgdx = Rg - Lg
mul dgdx, inv // dgdx *= FixedInvU(width)
subs dgdx, Rg, Lg // dgdx = Rg - Lg
mulne dgdx, inv // dgdx *= FixedInvU(width)
asr dgdx, #16 // dgdx >>= 16
// g == Lg (alias)

Expand Down
14 changes: 6 additions & 8 deletions src/platform/gba/asm/rasterizeS.s
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ Ry2 .req Rh
Lxy .req tmp
Ly2 .req Lh
indexB .req pair
Ltmp .req N
Rtmp .req N

.global rasterizeS_asm
rasterizeS_asm:
Expand Down Expand Up @@ -52,9 +50,9 @@ rasterizeS_asm:

divLUT tmp, Lh // tmp = FixedInvU(Lh)

ldrsh Ltmp, [L, #VERTEX_X]
sub Ltmp, Lx, asr #16
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx)
ldrsh Ldx, [L, #VERTEX_X]
subs Ldx, Lx, asr #16
mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx)
.calc_left_end:

cmp Rh, #0
Expand All @@ -76,9 +74,9 @@ rasterizeS_asm:

divLUT tmp, Rh // tmp = FixedInvU(Rh)

ldrsh Rtmp, [R, #VERTEX_X]
sub Rtmp, Rx, asr #16
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
ldrsh Rdx, [R, #VERTEX_X]
subs Rdx, Rx, asr #16
mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)
.calc_right_end:

cmp Rh, Lh // if (Rh < Lh)
Expand Down

0 comments on commit 5c13524

Please sign in to comment.