From 8e625e5e85d7824a95c2969647fe7e5b1c8e4021 Mon Sep 17 00:00:00 2001 From: Cameron Cawley Date: Sun, 14 Apr 2024 14:31:34 +0100 Subject: [PATCH] Speed up 32-bit aligned display transfers with the DS port --- arch/paldisplaydev.c | 109 +++++++++++++++++++++++++++++++++++++++++-- nds/DispKbd.c | 7 +++ 2 files changed, 112 insertions(+), 4 deletions(-) diff --git a/arch/paldisplaydev.c b/arch/paldisplaydev.c index 5fc561d..a0b4419 100644 --- a/arch/paldisplaydev.c +++ b/arch/paldisplaydev.c @@ -103,6 +103,12 @@ void PDD_Name(Host_EndUpdate)(ARMul_State *state,PDD_Row *row) - End updating the region of the row + ARMword *PDD_Name(Host_TransferUpdate)(ARMul_State *state,PDD_Row *row, + unsigned int count,const ARMword *src) + - Write 'count' bits of the row to the screen. + - 'count' will always be a multiple of 32. + - This is only used in DS builds at the moment. + void PDD_Name(Host_AdvanceRow)(ARMul_State *state,PDD_Row *row, unsigned int count) - Advance the row pointer by 'count' bits @@ -206,7 +212,8 @@ struct PDD_Name(DisplayInfo) { #define ROWFUNC_FORCE 0x1 /* Force row to be fully redrawn */ #define ROWFUNC_UPDATED 0x2 /* Flag used to indicate whether anything was done */ -#define ROWFUNC_UNALIGNED 0x4 /* Flag that gets set if we know we can't use the byte-aligned rowfuncs */ +#define ROWFUNC_UNALIGNED_BYTE 0x4 /* Flag that gets set if we know we can't use the byte-aligned rowfuncs */ +#define ROWFUNC_UNALIGNED_WORD 0x8 /* Flag that gets set if we know we can't use the word-aligned rowfuncs */ /* @@ -298,6 +305,51 @@ static inline int PDD_Name(RowFunc1XSameByteAligned)(ARMul_State *state,PDD_Row return (flags & ROWFUNC_UPDATED); } +static inline int PDD_Name(RowFunc1XSameWordAligned)(ARMul_State *state,PDD_Row drow,int flags) +{ + uint32_t Vptr = DC.Vptr>>5; + uint32_t Vstart = MEMC.Vstart<<2; + uint32_t Vend = (MEMC.Vend+1)<<2; /* Point to pixel after end */ + const ARMword *RAM = MEMC.PhysRam; + int Remaining = DC.BitWidth>>5; + + /* Sanity checks to avoid looping forever */ + if((Vptr >= Vend) || (Vstart >= Vend)) + return 0; + if(Vptr >= Vend) + Vptr = Vstart; + + /* Process the row */ + while(Remaining > 0) + { + uint32_t FlagsOffset = Vptr/(UPDATEBLOCKSIZE/4); + int Available = MIN(Remaining,MIN(((FlagsOffset+1)*(UPDATEBLOCKSIZE/4))-Vptr,Vend-Vptr)); + + if((flags & ROWFUNC_FORCE) || (HD.UpdateFlags[FlagsOffset] != MEMC.UpdateFlags[FlagsOffset])) + { + /* Process the pixels in this region, stopping at end of row/update block/Vend */ +#ifndef SYSTEM_nds + int outoffset; + ARMword *out = PDD_Name(Host_BeginUpdate)(state,&drow,Available<<5,&outoffset); + EndianWordCpy(out+(outoffset>>5),RAM+Vptr,Available); + PDD_Name(Host_EndUpdate)(state,&drow); +#else + PDD_Name(Host_TransferUpdate)(state,&drow,Available<<5,RAM+Vptr); +#endif + flags |= ROWFUNC_UPDATED; + } + PDD_Name(Host_AdvanceRow)(state,&drow,Available<<5); + Vptr += Available; + Remaining -= Available; + if(Vptr >= Vend) + Vptr = Vstart; + } + + DC.Vptr = Vptr<<5; + + return (flags & ROWFUNC_UPDATED); +} + /* Row output via ExpandTable @@ -425,6 +477,45 @@ static inline void PDD_Name(RowFunc1XSameByteAlignedNoFlags)(ARMul_State *state, DC.Vptr = Vptr<<3; } +static inline void PDD_Name(RowFunc1XSameWordAlignedNoFlags)(ARMul_State *state,PDD_Row drow) +{ + uint32_t Vptr = DC.Vptr>>5; + uint32_t Vstart = MEMC.Vstart<<2; + uint32_t Vend = (MEMC.Vend+1)<<2; /* Point to pixel after end */ + const ARMword *RAM = MEMC.PhysRam; + int Remaining = DC.BitWidth>>5; + + /* Sanity checks to avoid looping forever */ + if((Vptr >= Vend) || (Vstart >= Vend)) + return; + if(Vptr >= Vend) + Vptr = Vstart; + + /* Process the row */ + while(Remaining > 0) + { + int Available = MIN(Remaining,Vend-Vptr); + + /* Process the pixels in this region, stopping at end of row/update block/Vend */ +#ifndef SYSTEM_nds + int outoffset; + ARMword *out = PDD_Name(Host_BeginUpdate)(state,&drow,Available<<5,&outoffset); + EndianWordCpy(out+(outoffset>>5),RAM+Vptr,Available); + PDD_Name(Host_EndUpdate)(state,&drow); +#else + PDD_Name(Host_TransferUpdate)(state,&drow,Available<<5,RAM+Vptr); +#endif + + PDD_Name(Host_AdvanceRow)(state,&drow,Available<<5); + Vptr += Available; + Remaining -= Available; + if(Vptr >= Vend) + Vptr = Vstart; + } + + DC.Vptr = Vptr<<5; +} + /* Row output via ExpandTable @@ -683,7 +774,9 @@ static void PDD_Name(EventFunc)(ARMul_State *state,CycleCount nowtime) /* We can test these values once here, so that it's only output alignment that we need to worry about during the loop */ if((DC.Vptr & 0x7) || ((Width*BPP)&0x7)) - flags |= ROWFUNC_UNALIGNED; + flags |= ROWFUNC_UNALIGNED_WORD | ROWFUNC_UNALIGNED_BYTE; + else if((DC.Vptr & 0x31) || ((Width*BPP)&0x31)) + flags |= ROWFUNC_UNALIGNED_WORD; if(DisplayDev_UseUpdateFlags) { @@ -706,7 +799,11 @@ static void PDD_Name(EventFunc)(ARMul_State *state,CycleCount nowtime) { updated = PDD_Name(RowFuncExpandTable)(state,hrow,flags); } - else if(!(flags & ROWFUNC_UNALIGNED) && !(alignment & 0x7)) + else if(!(flags & ROWFUNC_UNALIGNED_WORD) && !(alignment & 0x31)) + { + updated = PDD_Name(RowFunc1XSameWordAligned)(state,hrow,flags); + } + else if(!(flags & ROWFUNC_UNALIGNED_BYTE) && !(alignment & 0x7)) { updated = PDD_Name(RowFunc1XSameByteAligned)(state,hrow,flags); } @@ -756,7 +853,11 @@ static void PDD_Name(EventFunc)(ARMul_State *state,CycleCount nowtime) { PDD_Name(RowFuncExpandTableNoFlags)(state,hrow); } - else if(!(flags & ROWFUNC_UNALIGNED) && !(alignment & 0x7)) + else if(!(flags & ROWFUNC_UNALIGNED_WORD) && !(alignment & 0x31)) + { + PDD_Name(RowFunc1XSameWordAlignedNoFlags)(state,hrow); + } + else if(!(flags & ROWFUNC_UNALIGNED_BYTE) && !(alignment & 0x7)) { PDD_Name(RowFunc1XSameByteAlignedNoFlags)(state,hrow); } diff --git a/nds/DispKbd.c b/nds/DispKbd.c index f7a70bf..8e28544 100644 --- a/nds/DispKbd.c +++ b/nds/DispKbd.c @@ -116,6 +116,13 @@ static inline void PDD_Name(Host_EndUpdate)(ARMul_State *state,PDD_Row *row) row->src += count>>5; } +static inline void PDD_Name(Host_TransferUpdate)(ARMul_State *state,PDD_Row *row,unsigned int count,const ARMword *src) +{ + DC_FlushRange(src, count>>3); + while (dmaBusy(3)); + dmaCopyWordsAsynch(3, src, row->dst, count>>3); +} + static inline void PDD_Name(Host_AdvanceRow)(ARMul_State *state,PDD_Row *row,unsigned int count) { row->dst += count>>3;