Skip to content

Commit

Permalink
#368 GBA audio processing optimizations, enable VRAM meshes, cleanup …
Browse files Browse the repository at this point in the history
…rendering constants
  • Loading branch information
XProger committed Dec 1, 2022
1 parent b70eade commit b1a5590
Show file tree
Hide file tree
Showing 7 changed files with 149 additions and 107 deletions.
11 changes: 5 additions & 6 deletions src/fixed/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
#define FRAME_HEIGHT 160

#define USE_FMT (LVL_FMT_PKD)
#define USE_VRAM_MESH // experimental

#include <tonc.h>
#elif defined(__NDS__)
Expand Down Expand Up @@ -189,8 +190,6 @@
#include <math.h>
#include <limits.h>

#define VRAM_WIDTH (FRAME_WIDTH/2) // in shorts

#ifndef USE_FMT
#define USE_FMT (LVL_FMT_PHD | LVL_FMT_PSX | LVL_FMT_SAT | LVL_FMT_TR2 | LVL_FMT_TR4)
#endif
Expand Down Expand Up @@ -343,13 +342,13 @@ X_INLINE int32 abs(int32 x) {
#endif

#if defined(__GBA_WIN__)
extern uint16 fb[VRAM_WIDTH * FRAME_HEIGHT];
extern uint16 fb[FRAME_WIDTH * FRAME_HEIGHT];
#elif defined(__GBA__)
extern uint32 fb;
#elif defined(__TNS__)
extern uint16 fb[VRAM_WIDTH * FRAME_HEIGHT];
extern uint16 fb[FRAME_WIDTH * FRAME_HEIGHT];
#elif defined(__DOS__)
extern uint16 fb[VRAM_WIDTH * FRAME_HEIGHT];
extern uint16 fb[FRAME_WIDTH * FRAME_HEIGHT];
#endif

#define STATIC_MESH_FLAG_NO_COLLISION 1
Expand Down Expand Up @@ -2839,7 +2838,7 @@ int32 doTutorial(ItemObj* lara, int32 track);
void sndInit();
void sndInitSamples();
void sndFreeSamples();
void sndFill(int8* buffer, int32 count);
void sndFill(int8* buffer);
void* sndPlaySample(int32 index, int32 volume, int32 pitch, int32 mode);
void sndPlayTrack(int32 track);
bool sndTrackIsPlaying();
Expand Down
46 changes: 25 additions & 21 deletions src/platform/gba/asm/sndIMA.s
Original file line number Diff line number Diff line change
Expand Up @@ -10,49 +10,53 @@ stepLUT .req r6
step .req r7
n .req r8
index .req r9
mask .req r10
out .req r12
tmp .req out
diff .req step

IMA_STEP_SIZE = 88

.macro decode4 n, out
.macro ima_decode
ldr step, [stepLUT, idx, lsl #2]

and index, \n, #7
mov tmp, step, lsl #1
mla step, index, tmp, step
tst \n, #8
subne smp, smp, step, lsr #3
addeq smp, smp, step, lsr #3
mul tmp, step, index
add diff, tmp, lsl #1

subne smp, diff, lsr #3
addeq smp, diff, lsr #3

subs index, #3
suble idx, idx, #1
bicle idx, idx, idx, asr #31
addgt idx, idx, index, lsl #1
cmpgt idx, #IMA_STEP_SIZE
suble idx, #1
addgt idx, index, lsl #1

// clamp 0..88
bic idx, idx, asr #31
cmp idx, #IMA_STEP_SIZE
movgt idx, #IMA_STEP_SIZE

mov \out, smp, asr #(2 + SND_VOL_SHIFT)
mov out, smp, asr #(2 + SND_VOL_SHIFT)
strb out, [buffer], #1
.endm

.global sndIMA_asm
sndIMA_asm:
.global sndIMA_fill_asm
sndIMA_fill_asm:
stmfd sp!, {r4-r9}

ldmia state, {smp, idx}

ldr stepLUT, =IMA_STEP

mov mask, #7
.loop:
ldrb n, [data], #1

decode4 n, out
strb out, [buffer], #1

mov n, n, lsr #4
and index, mask, n
tst n, #8
ima_decode

decode4 n, out
strb out, [buffer], #1
and index, mask, n, lsr #4
tst n, #(8 << 4)
ima_decode

subs size, #1
bne .loop
Expand Down
106 changes: 66 additions & 40 deletions src/platform/gba/asm/sndPCM.s
Original file line number Diff line number Diff line change
Expand Up @@ -7,61 +7,87 @@ volume .req r3

data .req r4
buffer .req r5
count .req r6
ampA .req r7
ampB .req r8
outA .req r9
outB .req r12
last .req count
tmpSP .req outB
tmp .req ampA

.macro clamp amp
tmp .req r6
last .req r12
tmpSP .req last
out .req size

.macro clamp
// Vanadium's clamp trick (-128..127)
mov tmp, \amp, asr #31 // tmp <- 0xffffffff
cmp tmp, \amp, asr #7 // not equal
eorne \amp, tmp, #0x7F // amp <- 0xffffff80
mov tmp, out, asr #31 // tmp <- 0xffffffff
cmp tmp, out, asr #7 // not equal
eorne out, tmp, #0x7F // out <- 0xffffff80
.endm

.macro calc_last
// last = pos + inc * SND_SAMPLES (176)
add last, inc, inc, lsl #2 // last = inc * 5
add last, inc, last, lsl #1 // last = inc * 11
add last, pos, last, lsl #4 // last = pos + (inc * 11) * 16
.endm

.macro pcm_sample_fetch
ldrb out, [data, pos, lsr #SND_FIXED_SHIFT]
add pos, inc
sub out, #128
mul out, volume
.endm

.macro pcm_sample_fill
pcm_sample_fetch
asr out, #SND_VOL_SHIFT
strb out, [buffer], #1
.endm

.macro pcm_sample_mix
pcm_sample_fetch
ldrsb tmp, [buffer]
add out, tmp, out, asr #SND_VOL_SHIFT
clamp
strb out, [buffer], #1
.endm

.global sndPCM_asm
sndPCM_asm:
.global sndPCM_fill_asm
sndPCM_fill_asm:
mov tmpSP, sp
stmfd sp!, {r4-r9}
stmfd sp!, {r4-r5}

ldmia tmpSP, {data, buffer, count}
ldmia tmpSP, {data, buffer}

calc_last

mla last, inc, count, pos
cmp last, size
movgt last, size

.loop:
ldrb ampA, [data, pos, lsr #SND_FIXED_SHIFT]
add pos, pos, inc
ldrb ampB, [data, pos, lsr #SND_FIXED_SHIFT]
add pos, pos, inc
.loop_fill:
pcm_sample_fill
pcm_sample_fill

// can't use signed PCM because of LDRSB restrictions
sub ampA, ampA, #128
sub ampB, ampB, #128
cmp pos, last
blt .loop_fill

mul ampA, volume
mul ampB, volume
ldmfd sp!, {r4-r5}
bx lr

ldrsb outA, [buffer, #0]
ldrsb outB, [buffer, #1]

add outA, ampA, asr #SND_VOL_SHIFT
add outB, ampB, asr #SND_VOL_SHIFT
.global sndPCM_mix_asm
sndPCM_mix_asm:
mov tmpSP, sp
stmfd sp!, {r4-r6} // tmp reg required

clamp outA
clamp outB
ldmia tmpSP, {data, buffer}

strb outA, [buffer], #1
strb outB, [buffer], #1
calc_last

cmp last, size
movgt last, size

.loop_mix:
pcm_sample_mix
pcm_sample_mix

cmp pos, last
blt .loop
blt .loop_mix

.done:
ldmfd sp!, {r4-r9}
bx lr
ldmfd sp!, {r4-r6}
bx lr
4 changes: 2 additions & 2 deletions src/platform/gba/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ void soundFill()
{
WAVEHDR *waveHdr = waveBuf + curSoundBuffer;
waveOutUnprepareHeader(waveOut, waveHdr, sizeof(WAVEHDR));
sndFill((int8*)waveHdr->lpData, SND_SAMPLES);
sndFill((int8*)waveHdr->lpData);
waveOutPrepareHeader(waveOut, waveHdr, sizeof(WAVEHDR));
waveOutWrite(waveOut, waveHdr, sizeof(WAVEHDR));
curSoundBuffer ^= 1;
Expand Down Expand Up @@ -503,7 +503,7 @@ void soundFill()
REG_DMA1CNT = DMA_DST_FIXED | DMA_REPEAT | DMA_16 | DMA_AT_FIFO | DMA_ENABLE;
}

sndFill(soundBuffer + curSoundBuffer, SND_SAMPLES);
sndFill(soundBuffer + curSoundBuffer);
curSoundBuffer ^= SND_SAMPLES;
}

Expand Down
14 changes: 7 additions & 7 deletions src/platform/gba/rasterizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ void rasterizeS_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
}
}

pixel += VRAM_WIDTH;
pixel += (FRAME_WIDTH >> 1);

Lx += Ldx;
Rx += Rdx;
Expand Down Expand Up @@ -253,7 +253,7 @@ void rasterizeF_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
}
}

pixel += VRAM_WIDTH;
pixel += (FRAME_WIDTH >> 1);

Lx += Ldx;
Rx += Rdx;
Expand Down Expand Up @@ -377,7 +377,7 @@ void rasterizeFT_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
}
}

pixel += VRAM_WIDTH;
pixel += (FRAME_WIDTH >> 1);

Lx += Ldx;
Rx += Rdx;
Expand Down Expand Up @@ -533,7 +533,7 @@ void rasterizeGT_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
}
}

pixel += VRAM_WIDTH;
pixel += (FRAME_WIDTH >> 1);

Lx += Ldx;
Rx += Rdx;
Expand Down Expand Up @@ -672,7 +672,7 @@ void rasterizeFTA_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
}
}

pixel += VRAM_WIDTH;
pixel += (FRAME_WIDTH >> 1);

Lx += Ldx;
Rx += Rdx;
Expand Down Expand Up @@ -846,7 +846,7 @@ void rasterizeGTA_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
}
}

pixel += VRAM_WIDTH;
pixel += (FRAME_WIDTH >> 1);

Lx += Ldx;
Rx += Rdx;
Expand Down Expand Up @@ -970,7 +970,7 @@ extern "C" X_NOINLINE void rasterizeSprite_c(uint16* pixel, const VertexLink* L,

if (L->v.y < 0)
{
pixel -= L->v.y * VRAM_WIDTH;
pixel -= L->v.y * (FRAME_WIDTH >> 1);
v -= L->v.y * dv;
h += L->v.y;
}
Expand Down
10 changes: 5 additions & 5 deletions src/platform/gba/render.iwram.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ struct ViewportRel {
ViewportRel viewportRel;

#if defined(__GBA_WIN__)
uint16 fb[VRAM_WIDTH * FRAME_HEIGHT];
uint16 fb[FRAME_WIDTH * FRAME_HEIGHT];
#elif defined(__GBA__)
uint32 fb = MEM_VRAM;
#elif defined(__TNS__)
uint16 fb[VRAM_WIDTH * FRAME_HEIGHT];
uint16 fb[FRAME_WIDTH * FRAME_HEIGHT];
#elif defined(__DOS__)
uint16 fb[VRAM_WIDTH * FRAME_HEIGHT];
uint16 fb[FRAME_WIDTH * FRAME_HEIGHT];
#endif

enum FaceType {
Expand Down Expand Up @@ -150,7 +150,7 @@ extern "C" {
#define faceAddMeshTriangles faceAddMeshTriangles_c
#define rasterize rasterize_c

X_INLINE bool checkBackface(const Vertex *a, const Vertex *b, const Vertex *c)
X_INLINE bool checkBackface(const Vertex* a, const Vertex* b, const Vertex* c)
{
return (b->x - a->x) * (c->y - a->y) <= (c->x - a->x) * (b->y - a->y);
}
Expand Down Expand Up @@ -803,7 +803,7 @@ void faceAddMesh(const MeshQuad* quads, const MeshTriangle* triangles, int32 qCo

void clear()
{
dmaFill((void*)fb, 0, VRAM_WIDTH * FRAME_HEIGHT * 2);
dmaFill((void*)fb, 0, FRAME_WIDTH * FRAME_HEIGHT);
}

void renderRoom(const Room* room)
Expand Down
Loading

0 comments on commit b1a5590

Please sign in to comment.