diff --git a/deps/lightrec/.gitrepo b/deps/lightrec/.gitrepo index 9898d9461..0fef3f2b7 100644 --- a/deps/lightrec/.gitrepo +++ b/deps/lightrec/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://github.com/pcercuei/lightrec.git branch = master - commit = 0df4ec86ba664dad3b4cc24fd3199131e8e3219f - parent = 364a705dc70b57a734b4e362226a386b34a008fb + commit = d640c6b484ac4936db16d865e4dc8850c1b5e122 + parent = ffa840032d55d2fd54f8546f332f91e6b8bbe495 method = merge cmdver = 0.4.3 diff --git a/deps/lightrec/CMakeLists.txt b/deps/lightrec/CMakeLists.txt index 809d0b776..8407c5893 100644 --- a/deps/lightrec/CMakeLists.txt +++ b/deps/lightrec/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.0) -project(lightrec LANGUAGES C VERSION 0.4) +project(lightrec LANGUAGES C VERSION 0.6) set(BUILD_SHARED_LIBS ON CACHE BOOL "Build shared libraries") if (NOT BUILD_SHARED_LIBS) @@ -68,6 +68,7 @@ option(OPT_TRANSFORM_OPS "(optimization) Transform opcodes" ON) option(OPT_LOCAL_BRANCHES "(optimization) Detect local branches" ON) option(OPT_SWITCH_DELAY_SLOTS "(optimization) Switch delay slots" ON) option(OPT_FLAG_STORES "(optimization) Flag stores that don't require invalidation" ON) +option(OPT_FLAG_IO "(optimization) Flag I/O opcodes whose target is known" ON) option(OPT_FLAG_MULT_DIV "(optimization) Flag MULT/DIV that only use one of HI/LO" ON) option(OPT_EARLY_UNLOAD "(optimization) Unload registers early" ON) @@ -90,15 +91,6 @@ if (CMAKE_C_COMPILER_ID STREQUAL "Clang") target_compile_options(${PROJECT_NAME} PRIVATE -Wno-initializer-overrides) endif() -option(ENABLE_TINYMM "Enable optional libtinymm dependency" OFF) -if (ENABLE_TINYMM) - find_library(TINYMM_LIBRARIES tinymm REQUIRED) - find_path(TINYMM_INCLUDE_DIR tinymm.h REQUIRED) - - include_directories(${TINYMM_INCLUDE_DIR}) - target_link_libraries(${PROJECT_NAME} PRIVATE ${TINYMM_LIBRARIES}) -endif (ENABLE_TINYMM) - if (ENABLE_THREADED_COMPILER) find_library(PTHREAD_LIBRARIES pthread REQUIRED) find_path(PTHREAD_INCLUDE_DIR pthread.h REQUIRED) @@ -107,6 +99,12 @@ if (ENABLE_THREADED_COMPILER) target_link_libraries(${PROJECT_NAME} PRIVATE ${PTHREAD_LIBRARIES}) endif (ENABLE_THREADED_COMPILER) +option(ENABLE_CODE_BUFFER "Enable external code buffer" OFF) +if (ENABLE_CODE_BUFFER) + target_sources(${PROJECT_NAME} PRIVATE tlsf/tlsf.c) + target_include_directories(${PROJECT_NAME} PRIVATE tlsf) +endif (ENABLE_CODE_BUFFER) + find_library(LIBLIGHTNING lightning REQUIRED) find_path(LIBLIGHTNING_INCLUDE_DIR lightning.h REQUIRED) @@ -118,7 +116,7 @@ if (LOG_LEVEL STREQUAL Debug) target_sources(${PROJECT_NAME} PRIVATE disassembler.c) endif() -configure_file(config.h.cmakein config.h @ONLY) +configure_file(lightrec-config.h.cmakein lightrec-config.h @ONLY) include(GNUInstallDirs) install(TARGETS ${PROJECT_NAME} diff --git a/deps/lightrec/README.md b/deps/lightrec/README.md index 40ecc8f6a..ab2c13b5f 100644 --- a/deps/lightrec/README.md +++ b/deps/lightrec/README.md @@ -50,4 +50,6 @@ Lightrec has been ported to the following emulators: * [__pcsx4all__ (my own fork)](https://github.com/pcercuei/pcsx4all) -* [__Beetle__ (libretro)](https://github.com/libretro/beetle-psx-libretro/) \ No newline at end of file +* [__Beetle__ (libretro)](https://github.com/libretro/beetle-psx-libretro/) + +[![Star History Chart](https://api.star-history.com/svg?repos=pcercuei/lightrec&type=Date)](https://star-history.com/#pcercuei/lightrec&Date) diff --git a/deps/lightrec/blockcache.c b/deps/lightrec/blockcache.c index 4512392dc..bb58cdb16 100644 --- a/deps/lightrec/blockcache.c +++ b/deps/lightrec/blockcache.c @@ -7,6 +7,8 @@ #include "debug.h" #include "lightrec-private.h" #include "memmanager.h" +#include "reaper.h" +#include "recompiler.h" #include #include @@ -63,8 +65,8 @@ void remove_from_code_lut(struct blockcache *cache, struct block *block) u32 offset = lut_offset(block->pc); if (block->function) { - memset(&state->code_lut[offset], 0, - block->nb_ops * sizeof(*state->code_lut)); + memset(lut_address(state, offset), 0, + block->nb_ops * lut_elm_size(state)); } } @@ -102,18 +104,64 @@ void lightrec_unregister_block(struct blockcache *cache, struct block *block) pr_err("Block at PC 0x%x is not in cache\n", block->pc); } -void lightrec_free_block_cache(struct blockcache *cache) +static bool lightrec_block_is_old(const struct lightrec_state *state, + const struct block *block) +{ + u32 diff = state->current_cycle - block->precompile_date; + + return diff > (1 << 27); /* About 4 seconds */ +} + +static void lightrec_free_blocks(struct blockcache *cache, + const struct block *except, bool all) { + struct lightrec_state *state = cache->state; struct block *block, *next; + bool outdated = all; unsigned int i; + u8 old_flags; for (i = 0; i < LUT_SIZE; i++) { for (block = cache->lut[i]; block; block = next) { next = block->next; - lightrec_free_block(cache->state, block); + + if (except && block == except) + continue; + + if (!all) { + outdated = lightrec_block_is_old(state, block) || + lightrec_block_is_outdated(state, block); + } + + if (!outdated) + continue; + + old_flags = block_set_flags(block, BLOCK_IS_DEAD); + + if (!(old_flags & BLOCK_IS_DEAD)) { + if (ENABLE_THREADED_COMPILER) + lightrec_recompiler_remove(state->rec, block); + + pr_debug("Freeing outdated block at PC 0x%08x\n", block->pc); + remove_from_code_lut(cache, block); + lightrec_unregister_block(cache, block); + lightrec_free_block(state, block); + } } } +} + +void lightrec_remove_outdated_blocks(struct blockcache *cache, + const struct block *except) +{ + pr_info("Running out of code space. Cleaning block cache...\n"); + lightrec_free_blocks(cache, except, false); +} + +void lightrec_free_block_cache(struct blockcache *cache) +{ + lightrec_free_blocks(cache, NULL, true); lightrec_free(cache->state, MEM_FOR_LIGHTREC, sizeof(*cache), cache); } @@ -150,22 +198,53 @@ u32 lightrec_calculate_block_hash(const struct block *block) return hash; } +static void lightrec_reset_lut_offset(struct lightrec_state *state, void *d) +{ + u32 pc = (u32)(uintptr_t) d; + struct block *block; + void *addr; + + block = lightrec_find_block(state->block_cache, pc); + if (!block) + return; + + if (block_has_flag(block, BLOCK_IS_DEAD)) + return; + + addr = block->function ?: state->get_next_block; + lut_write(state, lut_offset(pc), addr); +} + bool lightrec_block_is_outdated(struct lightrec_state *state, struct block *block) { - void **lut_entry = &state->code_lut[lut_offset(block->pc)]; + u32 offset = lut_offset(block->pc); bool outdated; - if (*lut_entry) + if (lut_read(state, offset)) return false; outdated = block->hash != lightrec_calculate_block_hash(block); if (likely(!outdated)) { /* The block was marked as outdated, but the content is still * the same */ - if (block->function) - *lut_entry = block->function; - else - *lut_entry = state->get_next_block; + + if (ENABLE_THREADED_COMPILER) { + /* + * When compiling a block that covers ours, the threaded + * compiler will set the LUT entries of the various + * entry points. Therefore we cannot write the LUT here, + * as we would risk overwriting the new entry points. + * Leave it to the reaper to re-install the LUT entries. + */ + + lightrec_reaper_add(state->reaper, + lightrec_reset_lut_offset, + (void *)(uintptr_t) block->pc); + } else if (block->function) { + lut_write(state, offset, block->function); + } else { + lut_write(state, offset, state->get_next_block); + } } return outdated; diff --git a/deps/lightrec/blockcache.h b/deps/lightrec/blockcache.h index 3b782f479..2e55ff657 100644 --- a/deps/lightrec/blockcache.h +++ b/deps/lightrec/blockcache.h @@ -24,4 +24,7 @@ void lightrec_free_block_cache(struct blockcache *cache); u32 lightrec_calculate_block_hash(const struct block *block); _Bool lightrec_block_is_outdated(struct lightrec_state *state, struct block *block); +void lightrec_remove_outdated_blocks(struct blockcache *cache, + const struct block *except); + #endif /* __BLOCKCACHE_H__ */ diff --git a/deps/lightrec/disassembler.c b/deps/lightrec/disassembler.c index 0c193da1e..bef95948f 100644 --- a/deps/lightrec/disassembler.c +++ b/deps/lightrec/disassembler.c @@ -11,7 +11,7 @@ #include "lightrec-private.h" #include "regcache.h" -static const char *std_opcodes[] = { +static const char * const std_opcodes[] = { [OP_J] = "j ", [OP_JAL] = "jal ", [OP_BEQ] = "beq ", @@ -40,11 +40,9 @@ static const char *std_opcodes[] = { [OP_SWR] = "swr ", [OP_LWC2] = "lwc2 ", [OP_SWC2] = "swc2 ", - [OP_META_BEQZ] = "beqz ", - [OP_META_BNEZ] = "bnez ", }; -static const char *special_opcodes[] = { +static const char * const special_opcodes[] = { [OP_SPECIAL_SLL] = "sll ", [OP_SPECIAL_SRL] = "srl ", [OP_SPECIAL_SRA] = "sra ", @@ -75,14 +73,14 @@ static const char *special_opcodes[] = { [OP_SPECIAL_SLTU] = "sltu ", }; -static const char *regimm_opcodes[] = { +static const char * const regimm_opcodes[] = { [OP_REGIMM_BLTZ] = "bltz ", [OP_REGIMM_BGEZ] = "bgez ", [OP_REGIMM_BLTZAL] = "bltzal ", [OP_REGIMM_BGEZAL] = "bgezal ", }; -static const char *cp0_opcodes[] = { +static const char * const cp0_opcodes[] = { [OP_CP0_MFC0] = "mfc0 ", [OP_CP0_CFC0] = "cfc0 ", [OP_CP0_MTC0] = "mtc0 ", @@ -90,46 +88,102 @@ static const char *cp0_opcodes[] = { [OP_CP0_RFE] = "rfe", }; -static const char *cp2_opcodes[] = { +static const char * const cp2_basic_opcodes[] = { [OP_CP2_BASIC_MFC2] = "mfc2 ", [OP_CP2_BASIC_CFC2] = "cfc2 ", [OP_CP2_BASIC_MTC2] = "mtc2 ", [OP_CP2_BASIC_CTC2] = "ctc2 ", }; -static const char *opcode_flags[] = { +static const char * const cp2_opcodes[] = { + [OP_CP2_RTPS] = "rtps ", + [OP_CP2_NCLIP] = "nclip ", + [OP_CP2_OP] = "op ", + [OP_CP2_DPCS] = "dpcs ", + [OP_CP2_INTPL] = "intpl ", + [OP_CP2_MVMVA] = "mvmva ", + [OP_CP2_NCDS] = "ncds ", + [OP_CP2_CDP] = "cdp ", + [OP_CP2_NCDT] = "ncdt ", + [OP_CP2_NCCS] = "nccs ", + [OP_CP2_CC] = "cc ", + [OP_CP2_NCS] = "ncs ", + [OP_CP2_NCT] = "nct ", + [OP_CP2_SQR] = "sqr ", + [OP_CP2_DCPL] = "dcpl ", + [OP_CP2_DPCT] = "dpct ", + [OP_CP2_AVSZ3] = "avsz3 ", + [OP_CP2_AVSZ4] = "avsz4 ", + [OP_CP2_RTPT] = "rtpt ", + [OP_CP2_GPF] = "gpf ", + [OP_CP2_GPL] = "gpl ", + [OP_CP2_NCCT] = "ncct ", +}; + +static const char * const mult2_opcodes[] = { + "mult2 ", "multu2 ", +}; + +static const char * const opcode_flags[] = { "switched branch/DS", - "unload Rs", - "unload Rt", - "unload Rd", "sync point", }; -static const char *opcode_io_flags[] = { - "memory I/O", - "hardware I/O", +static const char * const opcode_io_flags[] = { "self-modifying code", "no invalidation", + "no mask", }; -static const char *opcode_branch_flags[] = { +static const char * const opcode_io_modes[] = { + "Memory access", + "I/O access", + "RAM access", + "BIOS access", + "Scratchpad access", + "Mapped I/O access" +}; + +static const char * const opcode_branch_flags[] = { "emulate branch", "local branch", }; -static const char *opcode_multdiv_flags[] = { +static const char * const opcode_multdiv_flags[] = { "No LO", "No HI", "No div check", }; -static int print_flags(char *buf, size_t len, u16 flags, - const char **array, size_t array_size) +static size_t do_snprintf(char *buf, size_t len, bool *first, + const char *arg1, const char *arg2) { - const char *flag_name; - unsigned int i; + size_t bytes; + + if (*first) + bytes = snprintf(buf, len, "(%s%s", arg1, arg2); + else + bytes = snprintf(buf, len, ", %s%s", arg1, arg2); + + *first = false; + + return bytes; +} + +static const char * const reg_op_token[3] = { + "-", "*", "~", +}; + +static int print_flags(char *buf, size_t len, const struct opcode *op, + const char * const *array, size_t array_size, + bool is_io) +{ + const char *flag_name, *io_mode_name; + unsigned int i, io_mode; size_t count = 0, bytes; bool first = true; + u32 flags = op->flags; + unsigned int reg_op; for (i = 0; i < array_size + ARRAY_SIZE(opcode_flags); i++) { if (!(flags & BIT(i))) @@ -140,17 +194,56 @@ static int print_flags(char *buf, size_t len, u16 flags, else flag_name = array[i - ARRAY_SIZE(opcode_flags)]; - if (first) - bytes = snprintf(buf, len, "(%s", flag_name); - else - bytes = snprintf(buf, len, ", %s", flag_name); - - first = false; + bytes = do_snprintf(buf, len, &first, "", flag_name); buf += bytes; len -= bytes; count += bytes; } + if (is_io) { + io_mode = LIGHTREC_FLAGS_GET_IO_MODE(flags); + if (io_mode > 0) { + io_mode_name = opcode_io_modes[io_mode - 1]; + + bytes = do_snprintf(buf, len, &first, "", io_mode_name); + buf += bytes; + len -= bytes; + count += bytes; + } + } + + if (OPT_EARLY_UNLOAD) { + reg_op = LIGHTREC_FLAGS_GET_RS(flags); + if (reg_op) { + bytes = do_snprintf(buf, len, &first, + reg_op_token[reg_op - 1], + lightrec_reg_name(op->i.rs)); + buf += bytes; + len -= bytes; + count += bytes; + } + + reg_op = LIGHTREC_FLAGS_GET_RT(flags); + if (reg_op) { + bytes = do_snprintf(buf, len, &first, + reg_op_token[reg_op - 1], + lightrec_reg_name(op->i.rt)); + buf += bytes; + len -= bytes; + count += bytes; + } + + reg_op = LIGHTREC_FLAGS_GET_RD(flags); + if (reg_op) { + bytes = do_snprintf(buf, len, &first, + reg_op_token[reg_op - 1], + lightrec_reg_name(op->r.rd)); + buf += bytes; + len -= bytes; + count += bytes; + } + } + if (!first) count += snprintf(buf, len, ")"); else @@ -160,7 +253,7 @@ static int print_flags(char *buf, size_t len, u16 flags, } static int print_op_special(union code c, char *buf, size_t len, - const char ***flags_ptr, size_t *nb_flags) + const char * const **flags_ptr, size_t *nb_flags) { switch (c.r.op) { case OP_SPECIAL_SLL: @@ -190,6 +283,9 @@ static int print_op_special(union code c, char *buf, size_t len, lightrec_reg_name(c.r.rt), lightrec_reg_name(c.r.rs)); case OP_SPECIAL_JR: + *flags_ptr = opcode_branch_flags; + *nb_flags = ARRAY_SIZE(opcode_branch_flags); + fallthrough; case OP_SPECIAL_MTHI: case OP_SPECIAL_MTLO: return snprintf(buf, len, "%s%s", @@ -199,7 +295,7 @@ static int print_op_special(union code c, char *buf, size_t len, return snprintf(buf, len, "%s%s,%s", special_opcodes[c.r.op], lightrec_reg_name(c.r.rd), - lightrec_reg_name(c.r.rt)); + lightrec_reg_name(c.r.rs)); case OP_SPECIAL_SYSCALL: case OP_SPECIAL_BREAK: return snprintf(buf, len, "%s", special_opcodes[c.r.op]); @@ -228,17 +324,14 @@ static int print_op_special(union code c, char *buf, size_t len, static int print_op_cp(union code c, char *buf, size_t len, unsigned int cp) { if (cp == 2) { - switch (c.i.rs) { - case OP_CP0_MFC0: - case OP_CP0_CFC0: - case OP_CP0_MTC0: - case OP_CP0_CTC0: + switch (c.r.op) { + case OP_CP2_BASIC: return snprintf(buf, len, "%s%s,%u", - cp2_opcodes[c.i.rs], + cp2_basic_opcodes[c.i.rs], lightrec_reg_name(c.i.rt), c.r.rd); default: - return snprintf(buf, len, "cp2 (0x%08x)", c.opcode); + return snprintf(buf, len, "%s", cp2_opcodes[c.r.op]); } } else { switch (c.i.rs) { @@ -259,7 +352,8 @@ static int print_op_cp(union code c, char *buf, size_t len, unsigned int cp) } static int print_op(union code c, u32 pc, char *buf, size_t len, - const char ***flags_ptr, size_t *nb_flags) + const char * const **flags_ptr, size_t *nb_flags, + bool *is_io) { if (c.opcode == 0) return snprintf(buf, len, "nop "); @@ -276,10 +370,19 @@ static int print_op(union code c, u32 pc, char *buf, size_t len, pc + 4 + ((s16)c.i.imm << 2)); case OP_J: case OP_JAL: + *flags_ptr = opcode_branch_flags; + *nb_flags = ARRAY_SIZE(opcode_branch_flags); return snprintf(buf, len, "%s0x%x", std_opcodes[c.i.op], (pc & 0xf0000000) | (c.j.imm << 2)); case OP_BEQ: + if (c.i.rs == c.i.rt) { + *flags_ptr = opcode_branch_flags; + *nb_flags = ARRAY_SIZE(opcode_branch_flags); + return snprintf(buf, len, "b 0x%x", + pc + 4 + ((s16)c.i.imm << 2)); + } + fallthrough; case OP_BNE: case OP_BLEZ: case OP_BGTZ: @@ -326,6 +429,7 @@ static int print_op(union code c, u32 pc, char *buf, size_t len, case OP_SWR: *flags_ptr = opcode_io_flags; *nb_flags = ARRAY_SIZE(opcode_io_flags); + *is_io = true; return snprintf(buf, len, "%s%s,%hd(%s)", std_opcodes[c.i.op], lightrec_reg_name(c.i.rt), @@ -340,51 +444,63 @@ static int print_op(union code c, u32 pc, char *buf, size_t len, lightrec_reg_name(c.i.rt), (s16)c.i.imm, lightrec_reg_name(c.i.rs)); - case OP_META_BEQZ: - case OP_META_BNEZ: - *flags_ptr = opcode_branch_flags; - *nb_flags = ARRAY_SIZE(opcode_branch_flags); - return snprintf(buf, len, "%s%s,0x%x", - std_opcodes[c.i.op], - lightrec_reg_name(c.i.rs), - pc + 4 + ((s16)c.i.imm << 2)); case OP_META_MOV: return snprintf(buf, len, "move %s,%s", lightrec_reg_name(c.r.rd), lightrec_reg_name(c.r.rs)); + case OP_META_EXTC: + return snprintf(buf, len, "extc %s,%s", + lightrec_reg_name(c.i.rt), + lightrec_reg_name(c.i.rs)); + case OP_META_EXTS: + return snprintf(buf, len, "exts %s,%s", + lightrec_reg_name(c.i.rt), + lightrec_reg_name(c.i.rs)); + case OP_META_MULT2: + case OP_META_MULTU2: + *flags_ptr = opcode_multdiv_flags; + *nb_flags = ARRAY_SIZE(opcode_multdiv_flags); + return snprintf(buf, len, "%s%s,%s,%s,%u", + mult2_opcodes[c.i.op == OP_META_MULTU2], + lightrec_reg_name(get_mult_div_hi(c)), + lightrec_reg_name(get_mult_div_lo(c)), + lightrec_reg_name(c.r.rs), c.r.op); default: return snprintf(buf, len, "unknown (0x%08x)", c.opcode); } } -void lightrec_print_disassembly(const struct block *block, const u32 *code) +void lightrec_print_disassembly(const struct block *block, const u32 *code_ptr) { const struct opcode *op; - const char **flags_ptr; + const char * const *flags_ptr; size_t nb_flags, count, count2; char buf[256], buf2[256], buf3[256]; unsigned int i; - u32 pc, branch_pc; + u32 pc, branch_pc, code; + bool is_io; for (i = 0; i < block->nb_ops; i++) { op = &block->opcode_list[i]; branch_pc = get_branch_pc(block, i, 0); pc = block->pc + (i << 2); + code = LE32TOH(code_ptr[i]); - count = print_op((union code)code[i], pc, buf, sizeof(buf), - &flags_ptr, &nb_flags); + count = print_op((union code)code, pc, buf, sizeof(buf), + &flags_ptr, &nb_flags, &is_io); flags_ptr = NULL; nb_flags = 0; + is_io = false; count2 = print_op(op->c, branch_pc, buf2, sizeof(buf2), - &flags_ptr, &nb_flags); + &flags_ptr, &nb_flags, &is_io); - if (code[i] == op->c.opcode) { + if (code == op->c.opcode) { *buf2 = '\0'; count2 = 0; } - print_flags(buf3, sizeof(buf3), op->flags, flags_ptr, nb_flags); + print_flags(buf3, sizeof(buf3), op, flags_ptr, nb_flags, is_io); printf("0x%08x (0x%x)\t%s%*c%s%*c%s\n", pc, i << 2, buf, 30 - (int)count, ' ', buf2, 30 - (int)count2, ' ', buf3); diff --git a/deps/lightrec/disassembler.h b/deps/lightrec/disassembler.h index e78013aca..e4685a9db 100644 --- a/deps/lightrec/disassembler.h +++ b/deps/lightrec/disassembler.h @@ -8,6 +8,7 @@ #include "debug.h" #include "lightrec.h" +#include "lightrec-config.h" #ifndef __packed #define __packed __attribute__((packed)) @@ -17,25 +18,58 @@ /* Flags for all opcodes */ #define LIGHTREC_NO_DS BIT(0) -#define LIGHTREC_UNLOAD_RS BIT(1) -#define LIGHTREC_UNLOAD_RT BIT(2) -#define LIGHTREC_UNLOAD_RD BIT(3) -#define LIGHTREC_SYNC BIT(4) +#define LIGHTREC_SYNC BIT(1) /* Flags for load/store opcodes */ -#define LIGHTREC_DIRECT_IO BIT(5) -#define LIGHTREC_HW_IO BIT(6) -#define LIGHTREC_SMC BIT(7) -#define LIGHTREC_NO_INVALIDATE BIT(8) +#define LIGHTREC_SMC BIT(2) +#define LIGHTREC_NO_INVALIDATE BIT(3) +#define LIGHTREC_NO_MASK BIT(4) + +/* I/O mode for load/store opcodes */ +#define LIGHTREC_IO_MODE_LSB 5 +#define LIGHTREC_IO_MODE(x) ((x) << LIGHTREC_IO_MODE_LSB) +#define LIGHTREC_IO_UNKNOWN 0x0 +#define LIGHTREC_IO_DIRECT 0x1 +#define LIGHTREC_IO_HW 0x2 +#define LIGHTREC_IO_RAM 0x3 +#define LIGHTREC_IO_BIOS 0x4 +#define LIGHTREC_IO_SCRATCH 0x5 +#define LIGHTREC_IO_DIRECT_HW 0x6 +#define LIGHTREC_IO_MASK LIGHTREC_IO_MODE(0x7) +#define LIGHTREC_FLAGS_GET_IO_MODE(x) \ + (((x) & LIGHTREC_IO_MASK) >> LIGHTREC_IO_MODE_LSB) /* Flags for branches */ -#define LIGHTREC_EMULATE_BRANCH BIT(5) -#define LIGHTREC_LOCAL_BRANCH BIT(6) +#define LIGHTREC_EMULATE_BRANCH BIT(2) +#define LIGHTREC_LOCAL_BRANCH BIT(3) /* Flags for div/mult opcodes */ -#define LIGHTREC_NO_LO BIT(5) -#define LIGHTREC_NO_HI BIT(6) -#define LIGHTREC_NO_DIV_CHECK BIT(7) +#define LIGHTREC_NO_LO BIT(2) +#define LIGHTREC_NO_HI BIT(3) +#define LIGHTREC_NO_DIV_CHECK BIT(4) + +#define LIGHTREC_REG_RS_LSB 26 +#define LIGHTREC_REG_RS(x) ((x) << LIGHTREC_REG_RS_LSB) +#define LIGHTREC_REG_RS_MASK LIGHTREC_REG_RS(0x3) +#define LIGHTREC_FLAGS_GET_RS(x) \ + (((x) & LIGHTREC_REG_RS_MASK) >> LIGHTREC_REG_RS_LSB) + +#define LIGHTREC_REG_RT_LSB 28 +#define LIGHTREC_REG_RT(x) ((x) << LIGHTREC_REG_RT_LSB) +#define LIGHTREC_REG_RT_MASK LIGHTREC_REG_RT(0x3) +#define LIGHTREC_FLAGS_GET_RT(x) \ + (((x) & LIGHTREC_REG_RT_MASK) >> LIGHTREC_REG_RT_LSB) + +#define LIGHTREC_REG_RD_LSB 30 +#define LIGHTREC_REG_RD(x) ((x) << LIGHTREC_REG_RD_LSB) +#define LIGHTREC_REG_RD_MASK LIGHTREC_REG_RD(0x3) +#define LIGHTREC_FLAGS_GET_RD(x) \ + (((x) & LIGHTREC_REG_RD_MASK) >> LIGHTREC_REG_RD_LSB) + +#define LIGHTREC_REG_NOOP 0x0 +#define LIGHTREC_REG_UNLOAD 0x1 +#define LIGHTREC_REG_DISCARD 0x2 +#define LIGHTREC_REG_CLEAN 0x3 struct block; @@ -73,10 +107,13 @@ enum standard_opcodes { OP_LWC2 = 0x32, OP_SWC2 = 0x3a, - OP_META_BEQZ = 0x14, - OP_META_BNEZ = 0x15, - OP_META_MOV = 0x16, + + OP_META_EXTC = 0x17, + OP_META_EXTS = 0x18, + + OP_META_MULT2 = 0x19, + OP_META_MULTU2 = 0x1a, }; enum special_opcodes { @@ -127,6 +164,28 @@ enum cp0_opcodes { enum cp2_opcodes { OP_CP2_BASIC = 0x00, + OP_CP2_RTPS = 0x01, + OP_CP2_NCLIP = 0x06, + OP_CP2_OP = 0x0c, + OP_CP2_DPCS = 0x10, + OP_CP2_INTPL = 0x11, + OP_CP2_MVMVA = 0x12, + OP_CP2_NCDS = 0x13, + OP_CP2_CDP = 0x14, + OP_CP2_NCDT = 0x16, + OP_CP2_NCCS = 0x1b, + OP_CP2_CC = 0x1c, + OP_CP2_NCS = 0x1e, + OP_CP2_NCT = 0x20, + OP_CP2_SQR = 0x28, + OP_CP2_DCPL = 0x29, + OP_CP2_DPCT = 0x2a, + OP_CP2_AVSZ3 = 0x2d, + OP_CP2_AVSZ4 = 0x2e, + OP_CP2_RTPT = 0x30, + OP_CP2_GPF = 0x3d, + OP_CP2_GPL = 0x3e, + OP_CP2_NCCT = 0x3f, }; enum cp2_basic_opcodes { @@ -197,9 +256,66 @@ struct opcode { struct opcode_i i; struct opcode_j j; }; - u16 flags; + u32 flags; +}; + +struct opcode_list { + u16 nb_ops; + struct opcode ops[]; }; void lightrec_print_disassembly(const struct block *block, const u32 *code); +static inline _Bool op_flag_no_ds(u32 flags) +{ + return OPT_SWITCH_DELAY_SLOTS && (flags & LIGHTREC_NO_DS); +} + +static inline _Bool op_flag_sync(u32 flags) +{ + return OPT_LOCAL_BRANCHES && (flags & LIGHTREC_SYNC); +} + +static inline _Bool op_flag_smc(u32 flags) +{ + return OPT_FLAG_STORES && (flags & LIGHTREC_SMC); +} + +static inline _Bool op_flag_no_invalidate(u32 flags) +{ + return (OPT_FLAG_IO || OPT_FLAG_STORES) && + (flags & LIGHTREC_NO_INVALIDATE); +} + +static inline _Bool op_flag_no_mask(u32 flags) +{ + return OPT_FLAG_IO && (flags & LIGHTREC_NO_MASK); +} + +static inline _Bool op_flag_emulate_branch(u32 flags) +{ + return OPT_DETECT_IMPOSSIBLE_BRANCHES && + (flags & LIGHTREC_EMULATE_BRANCH); +} + +static inline _Bool op_flag_local_branch(u32 flags) +{ + return OPT_LOCAL_BRANCHES && (flags & LIGHTREC_LOCAL_BRANCH); +} + +static inline _Bool op_flag_no_lo(u32 flags) +{ + return OPT_FLAG_MULT_DIV && (flags & LIGHTREC_NO_LO); +} + +static inline _Bool op_flag_no_hi(u32 flags) +{ + return OPT_FLAG_MULT_DIV && (flags & LIGHTREC_NO_HI); +} + +static inline _Bool op_flag_no_div_check(u32 flags) +{ + return OPT_FLAG_MULT_DIV && (flags & LIGHTREC_NO_DIV_CHECK); +} + #endif /* __DISASSEMBLER_H__ */ diff --git a/deps/lightrec/emitter.c b/deps/lightrec/emitter.c index 0e44a77da..be50d6d8a 100644 --- a/deps/lightrec/emitter.c +++ b/deps/lightrec/emitter.c @@ -7,43 +7,47 @@ #include "debug.h" #include "disassembler.h" #include "emitter.h" +#include "lightning-wrapper.h" #include "optimizer.h" #include "regcache.h" -#include #include #include -typedef void (*lightrec_rec_func_t)(struct lightrec_state *, - const struct block *, u16); +typedef void (*lightrec_rec_func_t)(struct lightrec_cstate *, const struct block *, u16); /* Forward declarations */ -static void rec_SPECIAL(struct lightrec_state *state, const struct block *block, - u16 offset); -static void rec_REGIMM(struct lightrec_state *state, const struct block *block, - u16 offset); -static void rec_CP0(struct lightrec_state *state, const struct block *block, - u16 offset); -static void rec_CP2(struct lightrec_state *state, const struct block *block, - u16 offset); +static void rec_SPECIAL(struct lightrec_cstate *state, const struct block *block, u16 offset); +static void rec_REGIMM(struct lightrec_cstate *state, const struct block *block, u16 offset); +static void rec_CP0(struct lightrec_cstate *state, const struct block *block, u16 offset); +static void rec_CP2(struct lightrec_cstate *state, const struct block *block, u16 offset); -static void unknown_opcode(struct lightrec_state *state, const struct block *block, u16 offset) +static void unknown_opcode(struct lightrec_cstate *state, const struct block *block, u16 offset) { pr_warn("Unknown opcode: 0x%08x at PC 0x%08x\n", block->opcode_list[offset].c.opcode, block->pc + (offset << 2)); } -static void lightrec_emit_end_of_block(struct lightrec_state *state, +static void +lightrec_jump_to_eob(struct lightrec_cstate *state, jit_state_t *_jit) +{ + /* Prevent jit_jmpi() from using our cycles register as a temporary */ + jit_live(LIGHTREC_REG_CYCLE); + + jit_patch_abs(jit_jmpi(), state->state->eob_wrapper_func); +} + +static void lightrec_emit_end_of_block(struct lightrec_cstate *state, const struct block *block, u16 offset, s8 reg_new_pc, u32 imm, u8 ra_reg, u32 link, bool update_cycles) { struct regcache *reg_cache = state->reg_cache; - u32 cycles = state->cycles; jit_state_t *_jit = block->_jit; const struct opcode *op = &block->opcode_list[offset], *next = &block->opcode_list[offset + 1]; + u32 cycles = state->cycles + lightrec_cycles_of_opcode(op->c); jit_note(__FILE__, __LINE__); @@ -62,7 +66,7 @@ static void lightrec_emit_end_of_block(struct lightrec_state *state, } if (has_delay_slot(op->c) && - !(op->flags & (LIGHTREC_NO_DS | LIGHTREC_LOCAL_BRANCH))) { + !op_flag_no_ds(op->flags) && !op_flag_local_branch(op->flags)) { cycles += lightrec_cycles_of_opcode(next->c); /* Recompile the delay slot */ @@ -70,8 +74,8 @@ static void lightrec_emit_end_of_block(struct lightrec_state *state, lightrec_rec_opcode(state, block, offset + 1); } - /* Store back remaining registers */ - lightrec_storeback_regs(reg_cache, _jit); + /* Clean the remaining registers */ + lightrec_clean_regs(reg_cache, _jit); jit_movr(JIT_V0, reg_new_pc); @@ -80,53 +84,37 @@ static void lightrec_emit_end_of_block(struct lightrec_state *state, pr_debug("EOB: %u cycles\n", cycles); } - if (offset + !!(op->flags & LIGHTREC_NO_DS) < block->nb_ops - 1) - state->branches[state->nb_branches++] = jit_jmpi(); + lightrec_jump_to_eob(state, _jit); } -void lightrec_emit_eob(struct lightrec_state *state, const struct block *block, - u16 offset) +void lightrec_emit_eob(struct lightrec_cstate *state, + const struct block *block, u16 offset) { struct regcache *reg_cache = state->reg_cache; jit_state_t *_jit = block->_jit; - union code c = block->opcode_list[offset].c; - lightrec_storeback_regs(reg_cache, _jit); + lightrec_clean_regs(reg_cache, _jit); jit_movi(JIT_V0, block->pc + (offset << 2)); - jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, - state->cycles - lightrec_cycles_of_opcode(c)); + jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles); - state->branches[state->nb_branches++] = jit_jmpi(); + lightrec_jump_to_eob(state, _jit); } -static u8 get_jr_jalr_reg(struct lightrec_state *state, const struct block *block, u16 offset) +static u8 get_jr_jalr_reg(struct lightrec_cstate *state, const struct block *block, u16 offset) { struct regcache *reg_cache = state->reg_cache; jit_state_t *_jit = block->_jit; - const struct opcode *op = &block->opcode_list[offset], - *next = &block->opcode_list[offset + 1]; - u8 rs = lightrec_request_reg_in(reg_cache, _jit, op->r.rs, JIT_V0); - - /* If the source register is already mapped to JIT_R0 or JIT_R1, and the - * delay slot is a I/O operation, unload the register, since JIT_R0 and - * JIT_R1 are explicitely used by the I/O opcode generators. */ - if ((rs == JIT_R0 || rs == JIT_R1) && - !(op->flags & LIGHTREC_NO_DS) && - opcode_is_io(next->c) && - !(next->flags & (LIGHTREC_NO_INVALIDATE | LIGHTREC_DIRECT_IO))) { - lightrec_unload_reg(reg_cache, _jit, rs); - lightrec_free_reg(reg_cache, rs); - - rs = lightrec_request_reg_in(reg_cache, _jit, op->r.rs, JIT_V0); - } + const struct opcode *op = &block->opcode_list[offset]; + u8 rs; + rs = lightrec_request_reg_in(reg_cache, _jit, op->r.rs, JIT_V0); lightrec_lock_reg(reg_cache, _jit, rs); return rs; } -static void rec_special_JR(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_special_JR(struct lightrec_cstate *state, const struct block *block, u16 offset) { u8 rs = get_jr_jalr_reg(state, block, offset); @@ -134,7 +122,7 @@ static void rec_special_JR(struct lightrec_state *state, const struct block *blo lightrec_emit_end_of_block(state, block, offset, rs, 0, 31, 0, true); } -static void rec_special_JALR(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_special_JALR(struct lightrec_cstate *state, const struct block *block, u16 offset) { u8 rs = get_jr_jalr_reg(state, block, offset); union code c = block->opcode_list[offset].c; @@ -144,7 +132,7 @@ static void rec_special_JALR(struct lightrec_state *state, const struct block *b get_branch_pc(block, offset, 2), true); } -static void rec_J(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_J(struct lightrec_cstate *state, const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; @@ -154,7 +142,7 @@ static void rec_J(struct lightrec_state *state, const struct block *block, u16 o 31, 0, true); } -static void rec_JAL(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_JAL(struct lightrec_cstate *state, const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; @@ -164,8 +152,45 @@ static void rec_JAL(struct lightrec_state *state, const struct block *block, u16 31, get_branch_pc(block, offset, 2), true); } -static void rec_b(struct lightrec_state *state, const struct block *block, u16 offset, - jit_code_t code, u32 link, bool unconditional, bool bz) +static void lightrec_do_early_unload(struct lightrec_cstate *state, + const struct block *block, u16 offset) +{ + struct regcache *reg_cache = state->reg_cache; + const struct opcode *op = &block->opcode_list[offset]; + jit_state_t *_jit = block->_jit; + unsigned int i; + u8 reg; + struct { + u8 reg, op; + } reg_ops[3] = { + { op->r.rd, LIGHTREC_FLAGS_GET_RD(op->flags), }, + { op->i.rt, LIGHTREC_FLAGS_GET_RT(op->flags), }, + { op->i.rs, LIGHTREC_FLAGS_GET_RS(op->flags), }, + }; + + for (i = 0; i < ARRAY_SIZE(reg_ops); i++) { + reg = reg_ops[i].reg; + + switch (reg_ops[i].op) { + case LIGHTREC_REG_UNLOAD: + lightrec_clean_reg_if_loaded(reg_cache, _jit, reg, true); + break; + + case LIGHTREC_REG_DISCARD: + lightrec_discard_reg_if_loaded(reg_cache, reg); + break; + + case LIGHTREC_REG_CLEAN: + lightrec_clean_reg_if_loaded(reg_cache, _jit, reg, false); + break; + default: + break; + }; + } +} + +static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 offset, + jit_code_t code, jit_code_t code2, u32 link, bool unconditional, bool bz) { struct regcache *reg_cache = state->reg_cache; struct native_register *regs_backup; @@ -174,39 +199,54 @@ static void rec_b(struct lightrec_state *state, const struct block *block, u16 o const struct opcode *op = &block->opcode_list[offset], *next = &block->opcode_list[offset + 1]; jit_node_t *addr; - u8 link_reg; - u32 target_offset, cycles = state->cycles; + u8 link_reg, rs, rt; bool is_forward = (s16)op->i.imm >= -1; + int op_cycles = lightrec_cycles_of_opcode(op->c); + u32 target_offset, cycles = state->cycles + op_cycles; + bool no_indirection = false; u32 next_pc; jit_note(__FILE__, __LINE__); - if (!(op->flags & LIGHTREC_NO_DS)) + if (!op_flag_no_ds(op->flags)) cycles += lightrec_cycles_of_opcode(next->c); - state->cycles = 0; + state->cycles = -op_cycles; + + if (!unconditional) { + rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs, REG_EXT); + rt = bz ? 0 : lightrec_alloc_reg_in(reg_cache, + _jit, op->i.rt, REG_EXT); + + /* Unload dead registers before evaluating the branch */ + if (OPT_EARLY_UNLOAD) + lightrec_do_early_unload(state, block, offset); + + if (op_flag_local_branch(op->flags) && + (op_flag_no_ds(op->flags) || !next->opcode) && + is_forward && !lightrec_has_dirty_regs(reg_cache)) + no_indirection = true; + + if (no_indirection) + pr_debug("Using no indirection for branch at offset 0x%hx\n", offset << 2); + } if (cycles) jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles); if (!unconditional) { - u8 rs = lightrec_alloc_reg_in(reg_cache, _jit, op->i.rs, REG_EXT), - rt = bz ? 0 : lightrec_alloc_reg_in(reg_cache, - _jit, op->i.rt, REG_EXT); - /* Generate the branch opcode */ - addr = jit_new_node_pww(code, NULL, rs, rt); + if (!no_indirection) + addr = jit_new_node_pww(code, NULL, rs, rt); lightrec_free_regs(reg_cache); regs_backup = lightrec_regcache_enter_branch(reg_cache); } - if (op->flags & LIGHTREC_LOCAL_BRANCH) { - if (next && !(op->flags & LIGHTREC_NO_DS)) { - /* Recompile the delay slot */ - if (next->opcode) - lightrec_rec_opcode(state, block, offset + 1); - } + if (op_flag_local_branch(op->flags)) { + /* Recompile the delay slot */ + if (!op_flag_no_ds(op->flags) && next->opcode) + lightrec_rec_opcode(state, block, offset + 1); if (link) { /* Update the $ra register */ @@ -215,31 +255,36 @@ static void rec_b(struct lightrec_state *state, const struct block *block, u16 o lightrec_free_reg(reg_cache, link_reg); } - /* Store back remaining registers */ - lightrec_storeback_regs(reg_cache, _jit); + /* Clean remaining registers */ + lightrec_clean_regs(reg_cache, _jit); target_offset = offset + 1 + (s16)op->i.imm - - !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS)); + - !!op_flag_no_ds(op->flags); pr_debug("Adding local branch to offset 0x%x\n", target_offset << 2); branch = &state->local_branches[ state->nb_local_branches++]; branch->target = target_offset; - if (is_forward) - branch->branch = jit_jmpi(); + + if (no_indirection) + branch->branch = jit_new_node_pww(code2, NULL, rs, rt); + else if (is_forward) + branch->branch = jit_b(); else branch->branch = jit_bgti(LIGHTREC_REG_CYCLE, 0); } - if (!(op->flags & LIGHTREC_LOCAL_BRANCH) || !is_forward) { + if (!op_flag_local_branch(op->flags) || !is_forward) { next_pc = get_branch_pc(block, offset, 1 + (s16)op->i.imm); lightrec_emit_end_of_block(state, block, offset, -1, next_pc, 31, link, false); } if (!unconditional) { - jit_patch(addr); + if (!no_indirection) + jit_patch(addr); + lightrec_regcache_leave_branch(reg_cache, regs_backup); if (bz && link) { @@ -250,79 +295,88 @@ static void rec_b(struct lightrec_state *state, const struct block *block, u16 o lightrec_free_reg(reg_cache, link_reg); } - if (!(op->flags & LIGHTREC_NO_DS) && next->opcode) + if (!op_flag_no_ds(op->flags) && next->opcode) lightrec_rec_opcode(state, block, offset + 1); } } -static void rec_BNE(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_BNE(struct lightrec_cstate *state, + const struct block *block, u16 offset) { + union code c = block->opcode_list[offset].c; + _jit_name(block->_jit, __func__); - rec_b(state, block, offset, jit_code_beqr, 0, false, false); + + if (c.i.rt == 0) + rec_b(state, block, offset, jit_code_beqi, jit_code_bnei, 0, false, true); + else + rec_b(state, block, offset, jit_code_beqr, jit_code_bner, 0, false, false); } -static void rec_BEQ(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_BEQ(struct lightrec_cstate *state, + const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; _jit_name(block->_jit, __func__); - rec_b(state, block, offset, jit_code_bner, 0, - c.i.rs == c.i.rt, false); + + if (c.i.rt == 0) + rec_b(state, block, offset, jit_code_bnei, jit_code_beqi, 0, c.i.rs == 0, true); + else + rec_b(state, block, offset, jit_code_bner, jit_code_beqr, 0, c.i.rs == c.i.rt, false); } -static void rec_BLEZ(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_BLEZ(struct lightrec_cstate *state, + const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; _jit_name(block->_jit, __func__); - rec_b(state, block, offset, jit_code_bgti, 0, c.i.rs == 0, true); + rec_b(state, block, offset, jit_code_bgti, jit_code_blei, 0, c.i.rs == 0, true); } -static void rec_BGTZ(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_BGTZ(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_b(state, block, offset, jit_code_blei, 0, false, true); + rec_b(state, block, offset, jit_code_blei, jit_code_bgti, 0, false, true); } -static void rec_regimm_BLTZ(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_regimm_BLTZ(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_b(state, block, offset, jit_code_bgei, 0, false, true); + rec_b(state, block, offset, jit_code_bgei, jit_code_blti, 0, false, true); } -static void rec_regimm_BLTZAL(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_regimm_BLTZAL(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_b(state, block, offset, jit_code_bgei, + rec_b(state, block, offset, jit_code_bgei, jit_code_blti, get_branch_pc(block, offset, 2), false, true); } -static void rec_regimm_BGEZ(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_regimm_BGEZ(struct lightrec_cstate *state, + const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; _jit_name(block->_jit, __func__); - rec_b(state, block, offset, jit_code_blti, 0, !c.i.rs, true); + rec_b(state, block, offset, jit_code_blti, jit_code_bgei, 0, !c.i.rs, true); } -static void rec_regimm_BGEZAL(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_regimm_BGEZAL(struct lightrec_cstate *state, + const struct block *block, u16 offset) { const struct opcode *op = &block->opcode_list[offset]; _jit_name(block->_jit, __func__); - rec_b(state, block, offset, jit_code_blti, + rec_b(state, block, offset, jit_code_blti, jit_code_bgei, get_branch_pc(block, offset, 2), !op->i.rs, true); } -static void rec_alu_imm(struct lightrec_state *state, const struct block *block, +static void rec_alu_imm(struct lightrec_cstate *state, const struct block *block, u16 offset, jit_code_t code, bool slti) { struct regcache *reg_cache = state->reg_cache; @@ -343,7 +397,7 @@ static void rec_alu_imm(struct lightrec_state *state, const struct block *block, lightrec_free_reg(reg_cache, rt); } -static void rec_alu_special(struct lightrec_state *state, const struct block *block, +static void rec_alu_special(struct lightrec_cstate *state, const struct block *block, u16 offset, jit_code_t code, bool out_ext) { struct regcache *reg_cache = state->reg_cache; @@ -364,7 +418,7 @@ static void rec_alu_special(struct lightrec_state *state, const struct block *bl lightrec_free_reg(reg_cache, rd); } -static void rec_alu_shiftv(struct lightrec_state *state, const struct block *block, +static void rec_alu_shiftv(struct lightrec_cstate *state, const struct block *block, u16 offset, jit_code_t code) { struct regcache *reg_cache = state->reg_cache; @@ -398,39 +452,60 @@ static void rec_alu_shiftv(struct lightrec_state *state, const struct block *blo lightrec_free_reg(reg_cache, rd); } -static void rec_ADDIU(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_movi(struct lightrec_cstate *state, + const struct block *block, u16 offset) { - _jit_name(block->_jit, __func__); - rec_alu_imm(state, block, offset, jit_code_addi, false); + struct regcache *reg_cache = state->reg_cache; + union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + u16 flags = REG_EXT; + u8 rt; + + if (!(c.i.imm & 0x8000)) + flags |= REG_ZEXT; + + rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags); + + jit_movi(rt, (s32)(s16) c.i.imm); + + lightrec_free_reg(reg_cache, rt); } -static void rec_ADDI(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_ADDIU(struct lightrec_cstate *state, + const struct block *block, u16 offset) +{ + _jit_name(block->_jit, __func__); + if (block->opcode_list[offset].c.i.rs) + rec_alu_imm(state, block, offset, jit_code_addi, false); + else + rec_movi(state, block, offset); +} + +static void rec_ADDI(struct lightrec_cstate *state, + const struct block *block, u16 offset) { /* TODO: Handle the exception? */ _jit_name(block->_jit, __func__); - rec_alu_imm(state, block, offset, jit_code_addi, false); + rec_ADDIU(state, block, offset); } -static void rec_SLTIU(struct lightrec_state *state, const struct block *block, - u16 offset) - +static void rec_SLTIU(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_imm(state, block, offset, jit_code_lti_u, true); } -static void rec_SLTI(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_SLTI(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_imm(state, block, offset, jit_code_lti, true); } -static void rec_ANDI(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_ANDI(struct lightrec_cstate *state, + const struct block *block, u16 offset) { struct regcache *reg_cache = state->reg_cache; union code c = block->opcode_list[offset].c; @@ -456,7 +531,7 @@ static void rec_ANDI(struct lightrec_state *state, const struct block *block, lightrec_free_reg(reg_cache, rt); } -static void rec_alu_or_xor(struct lightrec_state *state, const struct block *block, +static void rec_alu_or_xor(struct lightrec_cstate *state, const struct block *block, u16 offset, jit_code_t code) { struct regcache *reg_cache = state->reg_cache; @@ -478,22 +553,22 @@ static void rec_alu_or_xor(struct lightrec_state *state, const struct block *blo } -static void rec_ORI(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_ORI(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_or_xor(state, block, offset, jit_code_ori); } -static void rec_XORI(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_XORI(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_or_xor(state, block, offset, jit_code_xori); } -static void rec_LUI(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_LUI(struct lightrec_cstate *state, + const struct block *block, u16 offset) { struct regcache *reg_cache = state->reg_cache; union code c = block->opcode_list[offset].c; @@ -513,38 +588,38 @@ static void rec_LUI(struct lightrec_state *state, const struct block *block, lightrec_free_reg(reg_cache, rt); } -static void rec_special_ADDU(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_ADDU(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_special(state, block, offset, jit_code_addr, false); } -static void rec_special_ADD(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_ADD(struct lightrec_cstate *state, + const struct block *block, u16 offset) { /* TODO: Handle the exception? */ _jit_name(block->_jit, __func__); rec_alu_special(state, block, offset, jit_code_addr, false); } -static void rec_special_SUBU(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_SUBU(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_special(state, block, offset, jit_code_subr, false); } -static void rec_special_SUB(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_SUB(struct lightrec_cstate *state, + const struct block *block, u16 offset) { /* TODO: Handle the exception? */ _jit_name(block->_jit, __func__); rec_alu_special(state, block, offset, jit_code_subr, false); } -static void rec_special_AND(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_AND(struct lightrec_cstate *state, + const struct block *block, u16 offset) { struct regcache *reg_cache = state->reg_cache; union code c = block->opcode_list[offset].c; @@ -578,8 +653,8 @@ static void rec_special_AND(struct lightrec_state *state, const struct block *bl lightrec_free_reg(reg_cache, rd); } -static void rec_special_or_nor(struct lightrec_state *state, const struct block *block, - u16 offset, bool nor) +static void rec_special_or_nor(struct lightrec_cstate *state, + const struct block *block, u16 offset, bool nor) { struct regcache *reg_cache = state->reg_cache; union code c = block->opcode_list[offset].c; @@ -599,10 +674,8 @@ static void rec_special_or_nor(struct lightrec_state *state, const struct block if (!nor) flags_rd = REG_ZEXT & flags_rs & flags_rt; - /* E(rd) = (E(rs) & E(rt)) | (E(rt) & !Z(rt)) | (E(rs) & !Z(rs)) */ - if ((REG_EXT & flags_rs & flags_rt) || - (flags_rt & (REG_EXT | REG_ZEXT) == REG_EXT) || - (flags_rs & (REG_EXT | REG_ZEXT) == REG_EXT)) + /* E(rd) = E(rs) & E(rt) */ + if (REG_EXT & flags_rs & flags_rt) flags_rd |= REG_EXT; lightrec_set_reg_out_flags(reg_cache, rd, flags_rd); @@ -617,21 +690,21 @@ static void rec_special_or_nor(struct lightrec_state *state, const struct block lightrec_free_reg(reg_cache, rd); } -static void rec_special_OR(struct lightrec_state *state, +static void rec_special_OR(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_special_or_nor(state, block, offset, false); } -static void rec_special_NOR(struct lightrec_state *state, +static void rec_special_NOR(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_special_or_nor(state, block, offset, true); } -static void rec_special_XOR(struct lightrec_state *state, +static void rec_special_XOR(struct lightrec_cstate *state, const struct block *block, u16 offset) { struct regcache *reg_cache = state->reg_cache; @@ -664,42 +737,42 @@ static void rec_special_XOR(struct lightrec_state *state, lightrec_free_reg(reg_cache, rd); } -static void rec_special_SLTU(struct lightrec_state *state, +static void rec_special_SLTU(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_special(state, block, offset, jit_code_ltr_u, true); } -static void rec_special_SLT(struct lightrec_state *state, +static void rec_special_SLT(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_special(state, block, offset, jit_code_ltr, true); } -static void rec_special_SLLV(struct lightrec_state *state, +static void rec_special_SLLV(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_shiftv(state, block, offset, jit_code_lshr); } -static void rec_special_SRLV(struct lightrec_state *state, +static void rec_special_SRLV(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_shiftv(state, block, offset, jit_code_rshr_u); } -static void rec_special_SRAV(struct lightrec_state *state, +static void rec_special_SRAV(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_shiftv(state, block, offset, jit_code_rshr); } -static void rec_alu_shift(struct lightrec_state *state, const struct block *block, +static void rec_alu_shift(struct lightrec_cstate *state, const struct block *block, u16 offset, jit_code_t code) { struct regcache *reg_cache = state->reg_cache; @@ -728,33 +801,33 @@ static void rec_alu_shift(struct lightrec_state *state, const struct block *bloc lightrec_free_reg(reg_cache, rd); } -static void rec_special_SLL(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_SLL(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_shift(state, block, offset, jit_code_lshi); } -static void rec_special_SRL(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_SRL(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_shift(state, block, offset, jit_code_rshi_u); } -static void rec_special_SRA(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_SRA(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_shift(state, block, offset, jit_code_rshi); } -static void rec_alu_mult(struct lightrec_state *state, const struct block *block, - u16 offset, bool is_signed) +static void rec_alu_mult(struct lightrec_cstate *state, + const struct block *block, u16 offset, bool is_signed) { struct regcache *reg_cache = state->reg_cache; union code c = block->opcode_list[offset].c; - u16 flags = block->opcode_list[offset].flags; + u32 flags = block->opcode_list[offset].flags; u8 reg_lo = get_mult_div_lo(c); u8 reg_hi = get_mult_div_hi(c); jit_state_t *_jit = block->_jit; @@ -770,59 +843,59 @@ static void rec_alu_mult(struct lightrec_state *state, const struct block *block rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, rflags); rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, rflags); - if (!(flags & LIGHTREC_NO_LO)) + if (!op_flag_no_lo(flags)) lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0); else if (__WORDSIZE == 32) lo = lightrec_alloc_reg_temp(reg_cache, _jit); - if (!(flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(flags)) hi = lightrec_alloc_reg_out(reg_cache, _jit, reg_hi, REG_EXT); -#if __WORDSIZE == 32 - /* On 32-bit systems, do a 32*32->64 bit operation, or a 32*32->32 bit - * operation if the MULT was detected a 32-bit only. */ - if (!(flags & LIGHTREC_NO_HI)) { - if (is_signed) - jit_qmulr(lo, hi, rs, rt); - else - jit_qmulr_u(lo, hi, rs, rt); - } else { - jit_mulr(lo, rs, rt); - } -#else - /* On 64-bit systems, do a 64*64->64 bit operation. */ - if (flags & LIGHTREC_NO_LO) { - jit_mulr(hi, rs, rt); - jit_rshi(hi, hi, 32); + if (__WORDSIZE == 32) { + /* On 32-bit systems, do a 32*32->64 bit operation, or a 32*32->32 bit + * operation if the MULT was detected a 32-bit only. */ + if (!op_flag_no_hi(flags)) { + if (is_signed) + jit_qmulr(lo, hi, rs, rt); + else + jit_qmulr_u(lo, hi, rs, rt); + } else { + jit_mulr(lo, rs, rt); + } } else { - jit_mulr(lo, rs, rt); + /* On 64-bit systems, do a 64*64->64 bit operation. */ + if (op_flag_no_lo(flags)) { + jit_mulr(hi, rs, rt); + jit_rshi(hi, hi, 32); + } else { + jit_mulr(lo, rs, rt); - /* The 64-bit output value is in $lo, store the upper 32 bits in $hi */ - if (!(flags & LIGHTREC_NO_HI)) - jit_rshi(hi, lo, 32); + /* The 64-bit output value is in $lo, store the upper 32 bits in $hi */ + if (!op_flag_no_hi(flags)) + jit_rshi(hi, lo, 32); + } } -#endif lightrec_free_reg(reg_cache, rs); lightrec_free_reg(reg_cache, rt); - if (!(flags & LIGHTREC_NO_LO) || __WORDSIZE == 32) + if (!op_flag_no_lo(flags) || __WORDSIZE == 32) lightrec_free_reg(reg_cache, lo); - if (!(flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(flags)) lightrec_free_reg(reg_cache, hi); } -static void rec_alu_div(struct lightrec_state *state, const struct block *block, - u16 offset, bool is_signed) +static void rec_alu_div(struct lightrec_cstate *state, + const struct block *block, u16 offset, bool is_signed) { struct regcache *reg_cache = state->reg_cache; union code c = block->opcode_list[offset].c; - u16 flags = block->opcode_list[offset].flags; - bool no_check = flags & LIGHTREC_NO_DIV_CHECK; + u32 flags = block->opcode_list[offset].flags; + bool no_check = op_flag_no_div_check(flags); u8 reg_lo = get_mult_div_lo(c); u8 reg_hi = get_mult_div_hi(c); jit_state_t *_jit = block->_jit; jit_node_t *branch, *to_end; - u8 lo, hi, rs, rt, rflags = 0; + u8 lo = 0, hi = 0, rs, rt, rflags = 0; jit_note(__FILE__, __LINE__); @@ -834,23 +907,22 @@ static void rec_alu_div(struct lightrec_state *state, const struct block *block, rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, rflags); rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, rflags); - if (!(flags & LIGHTREC_NO_LO)) + if (!op_flag_no_lo(flags)) lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0); - if (!(flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(flags)) hi = lightrec_alloc_reg_out(reg_cache, _jit, reg_hi, 0); /* Jump to special handler if dividing by zero */ if (!no_check) branch = jit_beqi(rt, 0); -#if __WORDSIZE == 32 - if (flags & LIGHTREC_NO_LO) { + if (op_flag_no_lo(flags)) { if (is_signed) jit_remr(hi, rs, rt); else jit_remr_u(hi, rs, rt); - } else if (flags & LIGHTREC_NO_HI) { + } else if (op_flag_no_hi(flags)) { if (is_signed) jit_divr(lo, rs, rt); else @@ -861,36 +933,14 @@ static void rec_alu_div(struct lightrec_state *state, const struct block *block, else jit_qdivr_u(lo, hi, rs, rt); } -#else - /* On 64-bit systems, the input registers must be 32 bits, so we first sign-extend - * (if div) or clear (if divu) the input registers. */ - if (flags & LIGHTREC_NO_LO) { - if (is_signed) - jit_remr(hi, rs, rt); - else - jit_remr_u(hi, rs, rt); - } else if (flags & LIGHTREC_NO_HI) { - if (is_signed) - jit_divr(lo, rs, rt); - else - jit_divr_u(lo, rs, rt); - } else { - if (is_signed) - jit_qdivr(lo, hi, rs, rt); - else - jit_qdivr_u(lo, hi, rs, rt); - } -#endif if (!no_check) { - lightrec_regcache_mark_live(reg_cache, _jit); - /* Jump above the div-by-zero handler */ - to_end = jit_jmpi(); + to_end = jit_b(); jit_patch(branch); - if (!(flags & LIGHTREC_NO_LO)) { + if (!op_flag_no_lo(flags)) { if (is_signed) { jit_lti(lo, rs, 0); jit_lshi(lo, lo, 1); @@ -900,7 +950,7 @@ static void rec_alu_div(struct lightrec_state *state, const struct block *block, } } - if (!(flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(flags)) jit_movr(hi, rs); jit_patch(to_end); @@ -909,42 +959,42 @@ static void rec_alu_div(struct lightrec_state *state, const struct block *block, lightrec_free_reg(reg_cache, rs); lightrec_free_reg(reg_cache, rt); - if (!(flags & LIGHTREC_NO_LO)) + if (!op_flag_no_lo(flags)) lightrec_free_reg(reg_cache, lo); - if (!(flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(flags)) lightrec_free_reg(reg_cache, hi); } -static void rec_special_MULT(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_MULT(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_mult(state, block, offset, true); } -static void rec_special_MULTU(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_MULTU(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_mult(state, block, offset, false); } -static void rec_special_DIV(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_DIV(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_div(state, block, offset, true); } -static void rec_special_DIVU(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_DIVU(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_alu_div(state, block, offset, false); } -static void rec_alu_mv_lo_hi(struct lightrec_state *state, +static void rec_alu_mv_lo_hi(struct lightrec_cstate *state, const struct block *block, u8 dst, u8 src) { struct regcache *reg_cache = state->reg_cache; @@ -954,18 +1004,14 @@ static void rec_alu_mv_lo_hi(struct lightrec_state *state, src = lightrec_alloc_reg_in(reg_cache, _jit, src, 0); dst = lightrec_alloc_reg_out(reg_cache, _jit, dst, REG_EXT); -#if __WORDSIZE == 32 - jit_movr(dst, src); -#else jit_extr_i(dst, src); -#endif lightrec_free_reg(reg_cache, src); lightrec_free_reg(reg_cache, dst); } -static void rec_special_MFHI(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_MFHI(struct lightrec_cstate *state, + const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; @@ -973,8 +1019,8 @@ static void rec_special_MFHI(struct lightrec_state *state, const struct block *b rec_alu_mv_lo_hi(state, block, c.r.rd, REG_HI); } -static void rec_special_MTHI(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_MTHI(struct lightrec_cstate *state, + const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; @@ -982,8 +1028,8 @@ static void rec_special_MTHI(struct lightrec_state *state, const struct block *b rec_alu_mv_lo_hi(state, block, REG_HI, c.r.rs); } -static void rec_special_MFLO(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_MFLO(struct lightrec_cstate *state, + const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; @@ -991,8 +1037,8 @@ static void rec_special_MFLO(struct lightrec_state *state, const struct block *b rec_alu_mv_lo_hi(state, block, c.r.rd, REG_LO); } -static void rec_special_MTLO(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_special_MTLO(struct lightrec_cstate *state, + const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; @@ -1000,42 +1046,57 @@ static void rec_special_MTLO(struct lightrec_state *state, const struct block *b rec_alu_mv_lo_hi(state, block, REG_LO, c.r.rs); } -static void call_to_c_wrapper(struct lightrec_state *state, const struct block *block, - u32 arg, bool with_arg, enum c_wrappers wrapper) +static void call_to_c_wrapper(struct lightrec_cstate *state, + const struct block *block, u32 arg, + enum c_wrappers wrapper) { struct regcache *reg_cache = state->reg_cache; jit_state_t *_jit = block->_jit; - u8 tmp, tmp2, tmp3; + s8 tmp, tmp2; - if (with_arg) - tmp3 = lightrec_alloc_reg(reg_cache, _jit, JIT_R1); - tmp2 = lightrec_alloc_reg(reg_cache, _jit, JIT_R0); - tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + /* Make sure JIT_R1 is not mapped; it will be used in the C wrapper. */ + tmp2 = lightrec_alloc_reg(reg_cache, _jit, JIT_R1); + + tmp = lightrec_get_reg_with_value(reg_cache, + (intptr_t) state->state->wrappers_eps[wrapper]); + if (tmp < 0) { + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + jit_ldxi(tmp, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, wrappers_eps[wrapper])); + + lightrec_temp_set_value(reg_cache, tmp, + (intptr_t) state->state->wrappers_eps[wrapper]); + } + + lightrec_free_reg(reg_cache, tmp2); - jit_ldxi(tmp, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, c_wrapper)); - jit_ldxi(tmp2, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, c_wrappers[wrapper])); - if (with_arg) - jit_movi(tmp3, arg); +#ifdef __mips__ + /* On MIPS, register t9 is always used as the target register for JALR. + * Therefore if it does not contain the target address we must + * invalidate it. */ + if (tmp != _T9) + lightrec_unload_reg(reg_cache, _jit, _T9); +#endif + + jit_prepare(); + jit_pushargi(arg); + lightrec_regcache_mark_live(reg_cache, _jit); jit_callr(tmp); lightrec_free_reg(reg_cache, tmp); - lightrec_free_reg(reg_cache, tmp2); - if (with_arg) - lightrec_free_reg(reg_cache, tmp3); lightrec_regcache_mark_live(reg_cache, _jit); } -static void rec_io(struct lightrec_state *state, const struct block *block, u16 offset, +static void rec_io(struct lightrec_cstate *state, + const struct block *block, u16 offset, bool load_rt, bool read_rt) { struct regcache *reg_cache = state->reg_cache; jit_state_t *_jit = block->_jit; union code c = block->opcode_list[offset].c; - u16 flags = block->opcode_list[offset].flags; - bool is_tagged = flags & (LIGHTREC_HW_IO | LIGHTREC_DIRECT_IO); + u32 flags = block->opcode_list[offset].flags; + bool is_tagged = LIGHTREC_FLAGS_GET_IO_MODE(flags); u32 lut_entry; jit_note(__FILE__, __LINE__); @@ -1048,19 +1109,167 @@ static void rec_io(struct lightrec_state *state, const struct block *block, u16 lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, false); if (is_tagged) { - call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_RW); + call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_RW); } else { lut_entry = lightrec_get_lut_entry(block); call_to_c_wrapper(state, block, (lut_entry << 16) | offset, - true, C_WRAPPER_RW_GENERIC); + C_WRAPPER_RW_GENERIC); + } +} + +static u32 rec_ram_mask(struct lightrec_state *state) +{ + return (RAM_SIZE << (state->mirrors_mapped * 2)) - 1; +} + +static u32 rec_io_mask(const struct lightrec_state *state) +{ + u32 length = state->maps[PSX_MAP_HW_REGISTERS].length; + + return GENMASK(31 - clz32(length - 1), 0); +} + +static void rec_store_memory(struct lightrec_cstate *cstate, + const struct block *block, + u16 offset, jit_code_t code, + jit_code_t swap_code, + uintptr_t addr_offset, u32 addr_mask, + bool invalidate) +{ + const struct lightrec_state *state = cstate->state; + struct regcache *reg_cache = cstate->reg_cache; + struct opcode *op = &block->opcode_list[offset]; + jit_state_t *_jit = block->_jit; + union code c = op->c; + u8 rs, rt, tmp, tmp2, tmp3, addr_reg, addr_reg2; + s16 imm = (s16)c.i.imm; + s32 simm = (s32)imm << (1 - lut_is_32bit(state)); + s32 lut_offt = offsetof(struct lightrec_state, code_lut); + bool no_mask = op_flag_no_mask(op->flags); + bool add_imm = c.i.imm && + ((!state->mirrors_mapped && !no_mask) || (invalidate && + ((imm & 0x3) || simm + lut_offt != (s16)(simm + lut_offt)))); + bool need_tmp = !no_mask || addr_offset || add_imm || invalidate; + + rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0); + rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0); + if (need_tmp) + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + + addr_reg = rs; + + if (add_imm) { + jit_addi(tmp, addr_reg, (s16)c.i.imm); + addr_reg = tmp; + imm = 0; + } else if (simm) { + lut_offt += simm; + } + + if (!no_mask) { + jit_andi(tmp, addr_reg, addr_mask); + addr_reg = tmp; + } + + if (addr_offset) { + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + jit_addi(tmp2, addr_reg, addr_offset); + addr_reg2 = tmp2; + } else { + addr_reg2 = addr_reg; + } + + if (is_big_endian() && swap_code && c.i.rt) { + tmp3 = lightrec_alloc_reg_temp(reg_cache, _jit); + + jit_new_node_ww(swap_code, tmp3, rt); + jit_new_node_www(code, imm, addr_reg2, tmp3); + + lightrec_free_reg(reg_cache, tmp3); + } else { + jit_new_node_www(code, imm, addr_reg2, rt); + } + + lightrec_free_reg(reg_cache, rt); + + if (invalidate) { + tmp3 = lightrec_alloc_reg_in(reg_cache, _jit, 0, 0); + + if (c.i.op != OP_SW) { + jit_andi(tmp, addr_reg, ~3); + addr_reg = tmp; + } + + if (!lut_is_32bit(state)) { + jit_lshi(tmp, addr_reg, 1); + addr_reg = tmp; + } + + if (addr_reg == rs && c.i.rs == 0) { + addr_reg = LIGHTREC_REG_STATE; + } else { + jit_addr(tmp, addr_reg, LIGHTREC_REG_STATE); + addr_reg = tmp; + } + + if (lut_is_32bit(state)) + jit_stxi_i(lut_offt, addr_reg, tmp3); + else + jit_stxi(lut_offt, addr_reg, tmp3); + + lightrec_free_reg(reg_cache, tmp3); } + + if (addr_offset) + lightrec_free_reg(reg_cache, tmp2); + if (need_tmp) + lightrec_free_reg(reg_cache, tmp); + lightrec_free_reg(reg_cache, rs); +} + +static void rec_store_ram(struct lightrec_cstate *cstate, + const struct block *block, + u16 offset, jit_code_t code, + jit_code_t swap_code, bool invalidate) +{ + struct lightrec_state *state = cstate->state; + + _jit_note(block->_jit, __FILE__, __LINE__); + + return rec_store_memory(cstate, block, offset, code, swap_code, + state->offset_ram, rec_ram_mask(state), + invalidate); +} + +static void rec_store_scratch(struct lightrec_cstate *cstate, + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code) +{ + _jit_note(block->_jit, __FILE__, __LINE__); + + return rec_store_memory(cstate, block, offset, code, swap_code, + cstate->state->offset_scratch, + 0x1fffffff, false); } -static void rec_store_direct_no_invalidate(struct lightrec_state *state, +static void rec_store_io(struct lightrec_cstate *cstate, + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code) +{ + _jit_note(block->_jit, __FILE__, __LINE__); + + return rec_store_memory(cstate, block, offset, code, swap_code, + cstate->state->offset_io, + rec_io_mask(cstate->state), false); +} + +static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate, const struct block *block, - u16 offset, jit_code_t code) + u16 offset, jit_code_t code, + jit_code_t swap_code) { - struct regcache *reg_cache = state->reg_cache; + struct lightrec_state *state = cstate->state; + struct regcache *reg_cache = cstate->reg_cache; union code c = block->opcode_list[offset].c; jit_state_t *_jit = block->_jit; jit_node_t *to_not_ram, *to_end; @@ -1092,11 +1301,9 @@ static void rec_store_direct_no_invalidate(struct lightrec_state *state, if (state->offset_ram != state->offset_scratch) { to_not_ram = jit_bmsi(tmp, BIT(28)); - lightrec_regcache_mark_live(reg_cache, _jit); - jit_movi(tmp2, state->offset_ram); - to_end = jit_jmpi(); + to_end = jit_b(); jit_patch(to_not_ram); jit_movi(tmp2, state->offset_scratch); @@ -1111,16 +1318,28 @@ static void rec_store_direct_no_invalidate(struct lightrec_state *state, } rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0); - jit_new_node_www(code, imm, tmp, rt); + + if (is_big_endian() && swap_code && c.i.rt) { + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + + jit_new_node_ww(swap_code, tmp2, rt); + jit_new_node_www(code, imm, tmp, tmp2); + + lightrec_free_reg(reg_cache, tmp2); + } else { + jit_new_node_www(code, imm, tmp, rt); + } lightrec_free_reg(reg_cache, rt); lightrec_free_reg(reg_cache, tmp); } -static void rec_store_direct(struct lightrec_state *state, const struct block *block, - u16 offset, jit_code_t code) +static void rec_store_direct(struct lightrec_cstate *cstate, const struct block *block, + u16 offset, jit_code_t code, jit_code_t swap_code) { - struct regcache *reg_cache = state->reg_cache; + struct lightrec_state *state = cstate->state; + u32 ram_size = state->mirrors_mapped ? RAM_SIZE * 4 : RAM_SIZE; + struct regcache *reg_cache = cstate->reg_cache; union code c = block->opcode_list[offset].c; jit_state_t *_jit = block->_jit; jit_node_t *to_not_ram, *to_end; @@ -1135,32 +1354,32 @@ static void rec_store_direct(struct lightrec_state *state, const struct block *b /* Convert to KUNSEG and avoid RAM mirrors */ if (c.i.imm) { jit_addi(tmp2, rs, (s16)c.i.imm); - jit_andi(tmp2, tmp2, 0x1f800000 | (RAM_SIZE - 1)); + jit_andi(tmp2, tmp2, 0x1f800000 | (ram_size - 1)); } else { - jit_andi(tmp2, rs, 0x1f800000 | (RAM_SIZE - 1)); + jit_andi(tmp2, rs, 0x1f800000 | (ram_size - 1)); } lightrec_free_reg(reg_cache, rs); tmp = lightrec_alloc_reg_temp(reg_cache, _jit); - to_not_ram = jit_bgti(tmp2, RAM_SIZE); - - lightrec_regcache_mark_live(reg_cache, _jit); + to_not_ram = jit_bgti(tmp2, ram_size); /* Compute the offset to the code LUT */ jit_andi(tmp, tmp2, (RAM_SIZE - 1) & ~3); -#if __WORDSIZE == 64 - jit_lshi(tmp, tmp, 1); -#endif + if (!lut_is_32bit(state)) + jit_lshi(tmp, tmp, 1); jit_addr(tmp, LIGHTREC_REG_STATE, tmp); /* Write NULL to the code LUT to invalidate any block that's there */ - jit_stxi(offsetof(struct lightrec_state, code_lut), tmp, tmp3); + if (lut_is_32bit(state)) + jit_stxi_i(offsetof(struct lightrec_state, code_lut), tmp, tmp3); + else + jit_stxi(offsetof(struct lightrec_state, code_lut), tmp, tmp3); if (state->offset_ram != state->offset_scratch) { jit_movi(tmp, state->offset_ram); - to_end = jit_jmpi(); + to_end = jit_b(); } jit_patch(to_not_ram); @@ -1178,76 +1397,203 @@ static void rec_store_direct(struct lightrec_state *state, const struct block *b lightrec_free_reg(reg_cache, tmp3); rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0); - jit_new_node_www(code, 0, tmp2, rt); + + if (is_big_endian() && swap_code && c.i.rt) { + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + + jit_new_node_ww(swap_code, tmp, rt); + jit_new_node_www(code, 0, tmp2, tmp); + + lightrec_free_reg(reg_cache, tmp); + } else { + jit_new_node_www(code, 0, tmp2, rt); + } lightrec_free_reg(reg_cache, rt); lightrec_free_reg(reg_cache, tmp2); } -static void rec_store(struct lightrec_state *state, const struct block *block, - u16 offset, jit_code_t code) -{ - u16 flags = block->opcode_list[offset].flags; - - if (flags & LIGHTREC_NO_INVALIDATE) { - rec_store_direct_no_invalidate(state, block, offset, code); - } else if (flags & LIGHTREC_DIRECT_IO) { - if (state->invalidate_from_dma_only) - rec_store_direct_no_invalidate(state, block, offset, code); - else - rec_store_direct(state, block, offset, code); - } else { +static void rec_store(struct lightrec_cstate *state, + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code) +{ + u32 flags = block->opcode_list[offset].flags; + bool no_invalidate = op_flag_no_invalidate(flags) || + state->state->invalidate_from_dma_only; + + switch (LIGHTREC_FLAGS_GET_IO_MODE(flags)) { + case LIGHTREC_IO_RAM: + rec_store_ram(state, block, offset, code, + swap_code, !no_invalidate); + break; + case LIGHTREC_IO_SCRATCH: + rec_store_scratch(state, block, offset, code, swap_code); + break; + case LIGHTREC_IO_DIRECT: + if (no_invalidate) { + rec_store_direct_no_invalidate(state, block, offset, + code, swap_code); + } else { + rec_store_direct(state, block, offset, code, swap_code); + } + break; + case LIGHTREC_IO_DIRECT_HW: + rec_store_io(state, block, offset, code, swap_code); + break; + default: rec_io(state, block, offset, true, false); + break; } } -static void rec_SB(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_SB(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_store(state, block, offset, jit_code_stxi_c); + rec_store(state, block, offset, jit_code_stxi_c, 0); } -static void rec_SH(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_SH(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_store(state, block, offset, jit_code_stxi_s); + rec_store(state, block, offset, + jit_code_stxi_s, jit_code_bswapr_us); } -static void rec_SW(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_SW(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_store(state, block, offset, jit_code_stxi_i); + rec_store(state, block, offset, + jit_code_stxi_i, jit_code_bswapr_ui); } -static void rec_SWL(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_SWL(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_io(state, block, offset, true, false); } -static void rec_SWR(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_SWR(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_io(state, block, offset, true, false); } -static void rec_SWC2(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_SWC2(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_io(state, block, offset, false, false); } -static void rec_load_direct(struct lightrec_state *state, const struct block *block, - u16 offset, jit_code_t code, bool is_unsigned) +static void rec_load_memory(struct lightrec_cstate *cstate, + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code, bool is_unsigned, + uintptr_t addr_offset, u32 addr_mask) { - struct regcache *reg_cache = state->reg_cache; + struct regcache *reg_cache = cstate->reg_cache; + struct opcode *op = &block->opcode_list[offset]; + jit_state_t *_jit = block->_jit; + u8 rs, rt, addr_reg, flags = REG_EXT; + bool no_mask = op_flag_no_mask(op->flags); + union code c = op->c; + s16 imm; + + if (!c.i.rt) + return; + + if (is_unsigned) + flags |= REG_ZEXT; + + rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0); + rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, flags); + + if (!cstate->state->mirrors_mapped && c.i.imm && !no_mask) { + jit_addi(rt, rs, (s16)c.i.imm); + addr_reg = rt; + imm = 0; + } else { + addr_reg = rs; + imm = (s16)c.i.imm; + } + + if (!no_mask) { + jit_andi(rt, addr_reg, addr_mask); + addr_reg = rt; + } + + if (addr_offset) { + jit_addi(rt, addr_reg, addr_offset); + addr_reg = rt; + } + + jit_new_node_www(code, rt, addr_reg, imm); + + if (is_big_endian() && swap_code) { + jit_new_node_ww(swap_code, rt, rt); + + if (c.i.op == OP_LH) + jit_extr_s(rt, rt); + else if (c.i.op == OP_LW && __WORDSIZE == 64) + jit_extr_i(rt, rt); + } + + lightrec_free_reg(reg_cache, rs); + lightrec_free_reg(reg_cache, rt); +} + +static void rec_load_ram(struct lightrec_cstate *cstate, + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code, bool is_unsigned) +{ + _jit_note(block->_jit, __FILE__, __LINE__); + + rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned, + cstate->state->offset_ram, rec_ram_mask(cstate->state)); +} + +static void rec_load_bios(struct lightrec_cstate *cstate, + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code, bool is_unsigned) +{ + _jit_note(block->_jit, __FILE__, __LINE__); + + rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned, + cstate->state->offset_bios, 0x1fffffff); +} + +static void rec_load_scratch(struct lightrec_cstate *cstate, + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code, bool is_unsigned) +{ + _jit_note(block->_jit, __FILE__, __LINE__); + + rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned, + cstate->state->offset_scratch, 0x1fffffff); +} + +static void rec_load_io(struct lightrec_cstate *cstate, + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code, bool is_unsigned) +{ + _jit_note(block->_jit, __FILE__, __LINE__); + + rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned, + cstate->state->offset_io, rec_io_mask(cstate->state)); +} + +static void rec_load_direct(struct lightrec_cstate *cstate, + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code, + bool is_unsigned) +{ + struct lightrec_state *state = cstate->state; + struct regcache *reg_cache = cstate->reg_cache; union code c = block->opcode_list[offset].c; jit_state_t *_jit = block->_jit; jit_node_t *to_not_ram, *to_not_bios, *to_end, *to_end2; @@ -1296,15 +1642,13 @@ static void rec_load_direct(struct lightrec_state *state, const struct block *bl } else { to_not_ram = jit_bmsi(addr_reg, BIT(28)); - lightrec_regcache_mark_live(reg_cache, _jit); - /* Convert to KUNSEG and avoid RAM mirrors */ jit_andi(rt, addr_reg, RAM_SIZE - 1); if (state->offset_ram) jit_movi(tmp, state->offset_ram); - to_end = jit_jmpi(); + to_end = jit_b(); jit_patch(to_not_ram); @@ -1317,7 +1661,7 @@ static void rec_load_direct(struct lightrec_state *state, const struct block *bl jit_movi(tmp, state->offset_bios); if (state->offset_bios != state->offset_scratch) { - to_end2 = jit_jmpi(); + to_end2 = jit_b(); jit_patch(to_not_bios); @@ -1338,79 +1682,122 @@ static void rec_load_direct(struct lightrec_state *state, const struct block *bl jit_new_node_www(code, rt, rt, imm); + if (is_big_endian() && swap_code) { + jit_new_node_ww(swap_code, rt, rt); + + if (c.i.op == OP_LH) + jit_extr_s(rt, rt); + else if (c.i.op == OP_LW && __WORDSIZE == 64) + jit_extr_i(rt, rt); + } + lightrec_free_reg(reg_cache, addr_reg); lightrec_free_reg(reg_cache, rt); lightrec_free_reg(reg_cache, tmp); } -static void rec_load(struct lightrec_state *state, const struct block *block, - u16 offset, jit_code_t code, bool is_unsigned) -{ - u16 flags = block->opcode_list[offset].flags; - - if (flags & LIGHTREC_DIRECT_IO) - rec_load_direct(state, block, offset, code, is_unsigned); - else +static void rec_load(struct lightrec_cstate *state, const struct block *block, + u16 offset, jit_code_t code, jit_code_t swap_code, + bool is_unsigned) +{ + u32 flags = block->opcode_list[offset].flags; + + switch (LIGHTREC_FLAGS_GET_IO_MODE(flags)) { + case LIGHTREC_IO_RAM: + rec_load_ram(state, block, offset, code, swap_code, is_unsigned); + break; + case LIGHTREC_IO_BIOS: + rec_load_bios(state, block, offset, code, swap_code, is_unsigned); + break; + case LIGHTREC_IO_SCRATCH: + rec_load_scratch(state, block, offset, code, swap_code, is_unsigned); + break; + case LIGHTREC_IO_DIRECT_HW: + rec_load_io(state, block, offset, code, swap_code, is_unsigned); + break; + case LIGHTREC_IO_DIRECT: + rec_load_direct(state, block, offset, code, swap_code, is_unsigned); + break; + default: rec_io(state, block, offset, false, true); + break; + } } -static void rec_LB(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_LB(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_load(state, block, offset, jit_code_ldxi_c, false); + rec_load(state, block, offset, jit_code_ldxi_c, 0, false); } -static void rec_LBU(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_LBU(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_load(state, block, offset, jit_code_ldxi_uc, true); + rec_load(state, block, offset, jit_code_ldxi_uc, 0, true); } -static void rec_LH(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_LH(struct lightrec_cstate *state, const struct block *block, u16 offset) { + jit_code_t code = is_big_endian() ? jit_code_ldxi_us : jit_code_ldxi_s; + _jit_name(block->_jit, __func__); - rec_load(state, block, offset, jit_code_ldxi_s, false); + rec_load(state, block, offset, code, jit_code_bswapr_us, false); } -static void rec_LHU(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_LHU(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_load(state, block, offset, jit_code_ldxi_us, true); + rec_load(state, block, offset, jit_code_ldxi_us, jit_code_bswapr_us, true); } -static void rec_LWL(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_LWL(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_io(state, block, offset, true, true); } -static void rec_LWR(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_LWR(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_io(state, block, offset, true, true); } -static void rec_LW(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_LW(struct lightrec_cstate *state, const struct block *block, u16 offset) { + jit_code_t code; + + if (is_big_endian() && __WORDSIZE == 64) + code = jit_code_ldxi_ui; + else + code = jit_code_ldxi_i; + _jit_name(block->_jit, __func__); - rec_load(state, block, offset, jit_code_ldxi_i, false); + rec_load(state, block, offset, code, jit_code_bswapr_ui, false); } -static void rec_LWC2(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_LWC2(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); rec_io(state, block, offset, false, false); } -static void rec_break_syscall(struct lightrec_state *state, const struct block *block, - u16 offset, bool is_break) +static void rec_break_syscall(struct lightrec_cstate *state, + const struct block *block, u16 offset, + u32 exit_code) { + struct regcache *reg_cache = state->reg_cache; + jit_state_t *_jit = block->_jit; + u8 tmp; + _jit_note(block->_jit, __FILE__, __LINE__); - if (is_break) - call_to_c_wrapper(state, block, 0, false, C_WRAPPER_BREAK); - else - call_to_c_wrapper(state, block, 0, false, C_WRAPPER_SYSCALL); + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + + jit_movi(tmp, exit_code); + jit_stxi_i(offsetof(struct lightrec_state, exit_flags), + LIGHTREC_REG_STATE, tmp); + + lightrec_free_reg(reg_cache, tmp); /* TODO: the return address should be "pc - 4" if we're a delay slot */ lightrec_emit_end_of_block(state, block, offset, -1, @@ -1418,20 +1805,21 @@ static void rec_break_syscall(struct lightrec_state *state, const struct block * 31, 0, true); } -static void rec_special_SYSCALL(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_special_SYSCALL(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_break_syscall(state, block, offset, false); + rec_break_syscall(state, block, offset, LIGHTREC_EXIT_SYSCALL); } -static void rec_special_BREAK(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_special_BREAK(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_break_syscall(state, block, offset, true); + rec_break_syscall(state, block, offset, LIGHTREC_EXIT_BREAK); } -static void rec_mfc(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_mfc(struct lightrec_cstate *state, const struct block *block, u16 offset) { struct regcache *reg_cache = state->reg_cache; union code c = block->opcode_list[offset].c; @@ -1440,10 +1828,10 @@ static void rec_mfc(struct lightrec_state *state, const struct block *block, jit_note(__FILE__, __LINE__); lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, true); - call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_MFC); + call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_MFC); } -static void rec_mtc(struct lightrec_state *state, const struct block *block, u16 offset) +static void rec_mtc(struct lightrec_cstate *state, const struct block *block, u16 offset) { struct regcache *reg_cache = state->reg_cache; union code c = block->opcode_list[offset].c; @@ -1453,85 +1841,494 @@ static void rec_mtc(struct lightrec_state *state, const struct block *block, u16 lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rs, false); lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, false); - call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_MTC); + call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_MTC); if (c.i.op == OP_CP0 && - !(block->opcode_list[offset].flags & LIGHTREC_NO_DS) && + !op_flag_no_ds(block->opcode_list[offset].flags) && (c.r.rd == 12 || c.r.rd == 13)) lightrec_emit_end_of_block(state, block, offset, -1, get_ds_pc(block, offset, 1), 0, 0, true); } -static void rec_cp0_MFC0(struct lightrec_state *state, const struct block *block, - u16 offset) +static void +rec_mfc0(struct lightrec_cstate *state, const struct block *block, u16 offset) +{ + struct regcache *reg_cache = state->reg_cache; + union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + u8 rt; + + jit_note(__FILE__, __LINE__); + + rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, REG_EXT); + + jit_ldxi_i(rt, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, regs.cp0[c.r.rd])); + + lightrec_free_reg(reg_cache, rt); +} + +static bool block_in_bios(const struct lightrec_cstate *state, + const struct block *block) +{ + const struct lightrec_mem_map *bios = &state->state->maps[PSX_MAP_BIOS]; + u32 pc = kunseg(block->pc); + + return pc >= bios->pc && pc < bios->pc + bios->length; +} + +static void +rec_mtc0(struct lightrec_cstate *state, const struct block *block, u16 offset) +{ + struct regcache *reg_cache = state->reg_cache; + const union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + u8 rt, tmp = 0, tmp2, status; + jit_node_t *to_end; + + jit_note(__FILE__, __LINE__); + + switch(c.r.rd) { + case 1: + case 4: + case 8: + case 14: + case 15: + /* Those registers are read-only */ + return; + default: + break; + } + + if (block_in_bios(state, block) && c.r.rd == 12) { + /* If we are running code from the BIOS, handle writes to the + * Status register in C. BIOS code may toggle bit 16 which will + * map/unmap the RAM, while game code cannot do that. */ + rec_mtc(state, block, offset); + return; + } + + rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0); + + if (c.r.rd != 13) { + jit_stxi_i(offsetof(struct lightrec_state, regs.cp0[c.r.rd]), + LIGHTREC_REG_STATE, rt); + } + + if (c.r.rd == 12 || c.r.rd == 13) { + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + jit_ldxi_i(tmp, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, regs.cp0[13])); + + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + } + + if (c.r.rd == 12) { + status = rt; + } else if (c.r.rd == 13) { + /* Cause = (Cause & ~0x0300) | (value & 0x0300) */ + jit_andi(tmp2, rt, 0x0300); + jit_ori(tmp, tmp, 0x0300); + jit_xori(tmp, tmp, 0x0300); + jit_orr(tmp, tmp, tmp2); + jit_ldxi_i(tmp2, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, regs.cp0[12])); + jit_stxi_i(offsetof(struct lightrec_state, regs.cp0[13]), + LIGHTREC_REG_STATE, tmp); + status = tmp2; + } + + if (c.r.rd == 12 || c.r.rd == 13) { + /* Exit dynarec in case there's a software interrupt. + * exit_flags = !!(status & tmp & 0x0300) & status; */ + jit_andr(tmp, tmp, status); + jit_andi(tmp, tmp, 0x0300); + jit_nei(tmp, tmp, 0); + jit_andr(tmp, tmp, status); + } + + if (c.r.rd == 12) { + /* Exit dynarec in case we unmask a hardware interrupt. + * exit_flags = !(~status & 0x401) */ + + jit_comr(tmp2, status); + jit_andi(tmp2, tmp2, 0x401); + jit_eqi(tmp2, tmp2, 0); + jit_orr(tmp, tmp, tmp2); + } + + lightrec_free_reg(reg_cache, rt); + + if (c.r.rd == 12 || c.r.rd == 13) { + to_end = jit_beqi(tmp, 0); + + jit_ldxi_i(tmp2, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, target_cycle)); + jit_subr(tmp2, tmp2, LIGHTREC_REG_CYCLE); + jit_movi(LIGHTREC_REG_CYCLE, 0); + jit_stxi_i(offsetof(struct lightrec_state, target_cycle), + LIGHTREC_REG_STATE, tmp2); + jit_stxi_i(offsetof(struct lightrec_state, current_cycle), + LIGHTREC_REG_STATE, tmp2); + + + jit_patch(to_end); + } + + if (!op_flag_no_ds(block->opcode_list[offset].flags) && + (c.r.rd == 12 || c.r.rd == 13)) { + state->cycles += lightrec_cycles_of_opcode(c); + lightrec_emit_eob(state, block, offset + 1); + } +} + +static void rec_cp0_MFC0(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_mfc(state, block, offset); + rec_mfc0(state, block, offset); } -static void rec_cp0_CFC0(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_cp0_CFC0(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_mfc(state, block, offset); + rec_mfc0(state, block, offset); } -static void rec_cp0_MTC0(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_cp0_MTC0(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_mtc(state, block, offset); + rec_mtc0(state, block, offset); } -static void rec_cp0_CTC0(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_cp0_CTC0(struct lightrec_cstate *state, + const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_mtc(state, block, offset); + rec_mtc0(state, block, offset); } -static void rec_cp2_basic_MFC2(struct lightrec_state *state, const struct block *block, - u16 offset) +static unsigned int cp2d_i_offset(u8 reg) { + return offsetof(struct lightrec_state, regs.cp2d[reg]); +} + +static unsigned int cp2d_s_offset(u8 reg) +{ + return cp2d_i_offset(reg) + is_big_endian() * 2; +} + +static unsigned int cp2c_i_offset(u8 reg) +{ + return offsetof(struct lightrec_state, regs.cp2c[reg]); +} + +static unsigned int cp2c_s_offset(u8 reg) +{ + return cp2c_i_offset(reg) + is_big_endian() * 2; +} + +static void rec_cp2_basic_MFC2(struct lightrec_cstate *state, + const struct block *block, u16 offset) +{ + struct regcache *reg_cache = state->reg_cache; + const union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + const u32 zext_regs = 0x300f0080; + u8 rt, tmp, tmp2, tmp3, out, flags; + u8 reg = c.r.rd == 15 ? 14 : c.r.rd; + unsigned int i; + _jit_name(block->_jit, __func__); - rec_mfc(state, block, offset); + + if (state->state->ops.cop2_notify) { + /* We must call cop2_notify, handle that in C. */ + rec_mfc(state, block, offset); + return; + } + + flags = (zext_regs & BIT(reg)) ? REG_ZEXT : REG_EXT; + rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, flags); + + switch (reg) { + case 1: + case 3: + case 5: + case 8: + case 9: + case 10: + case 11: + jit_ldxi_s(rt, LIGHTREC_REG_STATE, cp2d_s_offset(reg)); + break; + case 7: + case 16: + case 17: + case 18: + case 19: + jit_ldxi_us(rt, LIGHTREC_REG_STATE, cp2d_s_offset(reg)); + break; + case 28: + case 29: + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + tmp3 = lightrec_alloc_reg_temp(reg_cache, _jit); + + for (i = 0; i < 3; i++) { + out = i == 0 ? rt : tmp; + + jit_ldxi_s(tmp, LIGHTREC_REG_STATE, cp2d_s_offset(9 + i)); + jit_movi(tmp2, 0x1f); + jit_rshi(out, tmp, 7); + + jit_ltr(tmp3, tmp2, out); + jit_movnr(out, tmp2, tmp3); + + jit_gei(tmp2, out, 0); + jit_movzr(out, tmp2, tmp2); + + if (i > 0) { + jit_lshi(tmp, tmp, 5 * i); + jit_orr(rt, rt, tmp); + } + } + + + lightrec_free_reg(reg_cache, tmp); + lightrec_free_reg(reg_cache, tmp2); + lightrec_free_reg(reg_cache, tmp3); + break; + default: + jit_ldxi_i(rt, LIGHTREC_REG_STATE, cp2d_i_offset(reg)); + break; + } + + lightrec_free_reg(reg_cache, rt); } -static void rec_cp2_basic_CFC2(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_cp2_basic_CFC2(struct lightrec_cstate *state, + const struct block *block, u16 offset) { + struct regcache *reg_cache = state->reg_cache; + const union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + u8 rt; + _jit_name(block->_jit, __func__); - rec_mfc(state, block, offset); + + if (state->state->ops.cop2_notify) { + /* We must call cop2_notify, handle that in C. */ + rec_mfc(state, block, offset); + return; + } + + switch (c.r.rd) { + case 4: + case 12: + case 20: + case 26: + case 27: + case 29: + case 30: + rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_EXT); + jit_ldxi_s(rt, LIGHTREC_REG_STATE, cp2c_s_offset(c.r.rd)); + break; + default: + rt = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rt, REG_ZEXT); + jit_ldxi_ui(rt, LIGHTREC_REG_STATE, cp2c_i_offset(c.r.rd)); + break; + } + + lightrec_free_reg(reg_cache, rt); } -static void rec_cp2_basic_MTC2(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_cp2_basic_MTC2(struct lightrec_cstate *state, + const struct block *block, u16 offset) { + struct regcache *reg_cache = state->reg_cache; + const union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + jit_node_t *loop, *to_loop; + u8 rt, tmp, tmp2, flags = 0; + _jit_name(block->_jit, __func__); - rec_mtc(state, block, offset); + + if (state->state->ops.cop2_notify) { + /* We must call cop2_notify, handle that in C. */ + rec_mtc(state, block, offset); + return; + } + + if (c.r.rd == 31) + return; + + if (c.r.rd == 30) + flags |= REG_EXT; + + rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, flags); + + switch (c.r.rd) { + case 15: + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + jit_ldxi_i(tmp, LIGHTREC_REG_STATE, cp2d_i_offset(13)); + + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + jit_ldxi_i(tmp2, LIGHTREC_REG_STATE, cp2d_i_offset(14)); + + jit_stxi_i(cp2d_i_offset(12), LIGHTREC_REG_STATE, tmp); + jit_stxi_i(cp2d_i_offset(13), LIGHTREC_REG_STATE, tmp2); + jit_stxi_i(cp2d_i_offset(14), LIGHTREC_REG_STATE, rt); + + lightrec_free_reg(reg_cache, tmp); + lightrec_free_reg(reg_cache, tmp2); + break; + case 28: + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + + jit_lshi(tmp, rt, 7); + jit_andi(tmp, tmp, 0xf80); + jit_stxi_s(cp2d_s_offset(9), LIGHTREC_REG_STATE, tmp); + + jit_lshi(tmp, rt, 2); + jit_andi(tmp, tmp, 0xf80); + jit_stxi_s(cp2d_s_offset(10), LIGHTREC_REG_STATE, tmp); + + jit_rshi(tmp, rt, 3); + jit_andi(tmp, tmp, 0xf80); + jit_stxi_s(cp2d_s_offset(11), LIGHTREC_REG_STATE, tmp); + + lightrec_free_reg(reg_cache, tmp); + break; + case 30: + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + + /* if (rt < 0) rt = ~rt; */ + jit_rshi(tmp, rt, 31); + jit_xorr(tmp, rt, tmp); + + /* We know the sign bit is 0. Left-shift by 1 to start the algorithm */ + jit_lshi(tmp, tmp, 1); + jit_movi(tmp2, 33); + + /* Decrement tmp2 and right-shift the value by 1 until it equals zero */ + loop = jit_label(); + jit_subi(tmp2, tmp2, 1); + jit_rshi_u(tmp, tmp, 1); + to_loop = jit_bnei(tmp, 0); + + jit_patch_at(to_loop, loop); + + jit_stxi_i(cp2d_i_offset(31), LIGHTREC_REG_STATE, tmp2); + jit_stxi_i(cp2d_i_offset(30), LIGHTREC_REG_STATE, rt); + + lightrec_free_reg(reg_cache, tmp); + lightrec_free_reg(reg_cache, tmp2); + break; + default: + jit_stxi_i(cp2d_i_offset(c.r.rd), LIGHTREC_REG_STATE, rt); + break; + } + + lightrec_free_reg(reg_cache, rt); } -static void rec_cp2_basic_CTC2(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_cp2_basic_CTC2(struct lightrec_cstate *state, + const struct block *block, u16 offset) { + struct regcache *reg_cache = state->reg_cache; + const union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + u8 rt, tmp, tmp2; + _jit_name(block->_jit, __func__); - rec_mtc(state, block, offset); + + if (state->state->ops.cop2_notify) { + /* We must call cop2_notify, handle that in C. */ + rec_mtc(state, block, offset); + return; + } + + rt = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rt, 0); + + switch (c.r.rd) { + case 4: + case 12: + case 20: + case 26: + case 27: + case 29: + case 30: + jit_stxi_s(cp2c_s_offset(c.r.rd), LIGHTREC_REG_STATE, rt); + break; + case 31: + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); + + jit_andi(tmp, rt, 0x7f87e000); + jit_nei(tmp, tmp, 0); + jit_lshi(tmp, tmp, 31); + + jit_andi(tmp2, rt, 0x7ffff000); + jit_orr(tmp, tmp2, tmp); + + jit_stxi_i(cp2c_i_offset(31), LIGHTREC_REG_STATE, tmp); + + lightrec_free_reg(reg_cache, tmp); + lightrec_free_reg(reg_cache, tmp2); + break; + + default: + jit_stxi_i(cp2c_i_offset(c.r.rd), LIGHTREC_REG_STATE, rt); + } + + lightrec_free_reg(reg_cache, rt); } -static void rec_cp0_RFE(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_cp0_RFE(struct lightrec_cstate *state, + const struct block *block, u16 offset) { + struct regcache *reg_cache = state->reg_cache; jit_state_t *_jit = block->_jit; + u8 status, tmp; jit_name(__func__); jit_note(__FILE__, __LINE__); - call_to_c_wrapper(state, block, 0, false, C_WRAPPER_RFE); + status = lightrec_alloc_reg_temp(reg_cache, _jit); + jit_ldxi_i(status, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, regs.cp0[12])); + + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + + /* status = ((status >> 2) & 0xf) | status & ~0xf; */ + jit_rshi(tmp, status, 2); + jit_andi(tmp, tmp, 0xf); + jit_andi(status, status, ~0xful); + jit_orr(status, status, tmp); + + jit_ldxi_i(tmp, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, regs.cp0[13])); + jit_stxi_i(offsetof(struct lightrec_state, regs.cp0[12]), + LIGHTREC_REG_STATE, status); + + /* Exit dynarec in case there's a software interrupt. + * exit_flags = !!(status & cause & 0x0300) & status; */ + jit_andr(tmp, tmp, status); + jit_andi(tmp, tmp, 0x0300); + jit_nei(tmp, tmp, 0); + jit_andr(tmp, tmp, status); + jit_stxi_i(offsetof(struct lightrec_state, exit_flags), + LIGHTREC_REG_STATE, tmp); + + lightrec_free_reg(reg_cache, status); + lightrec_free_reg(reg_cache, tmp); } -static void rec_CP(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_CP(struct lightrec_cstate *state, + const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; jit_state_t *_jit = block->_jit; @@ -1539,48 +2336,119 @@ static void rec_CP(struct lightrec_state *state, const struct block *block, jit_name(__func__); jit_note(__FILE__, __LINE__); - call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_CP); + call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_CP); } -static void rec_meta_BEQZ(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_meta_MOV(struct lightrec_cstate *state, + const struct block *block, u16 offset) { + struct regcache *reg_cache = state->reg_cache; + union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + u8 rs, rd; + _jit_name(block->_jit, __func__); - rec_b(state, block, offset, jit_code_bnei, 0, false, true); + jit_note(__FILE__, __LINE__); + if (c.r.rs) + rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0); + rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, REG_EXT); + + if (c.r.rs == 0) + jit_movi(rd, 0); + else + jit_extr_i(rd, rs); + + if (c.r.rs) + lightrec_free_reg(reg_cache, rs); + lightrec_free_reg(reg_cache, rd); } -static void rec_meta_BNEZ(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_meta_EXTC_EXTS(struct lightrec_cstate *state, + const struct block *block, + u16 offset) { + struct regcache *reg_cache = state->reg_cache; + union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + u8 rs, rt; + _jit_name(block->_jit, __func__); - rec_b(state, block, offset, jit_code_beqi, 0, false, true); + jit_note(__FILE__, __LINE__); + + rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0); + rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, REG_EXT); + + if (c.i.op == OP_META_EXTC) + jit_extr_c(rt, rs); + else + jit_extr_s(rt, rs); + + lightrec_free_reg(reg_cache, rs); + lightrec_free_reg(reg_cache, rt); } -static void rec_meta_MOV(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_meta_MULT2(struct lightrec_cstate *state, + const struct block *block, + u16 offset) { struct regcache *reg_cache = state->reg_cache; union code c = block->opcode_list[offset].c; jit_state_t *_jit = block->_jit; - u8 rs, rd; + u8 reg_lo = get_mult_div_lo(c); + u8 reg_hi = get_mult_div_hi(c); + u32 flags = block->opcode_list[offset].flags; + bool is_signed = c.i.op == OP_META_MULT2; + u8 rs, lo, hi, rflags = 0, hiflags = 0; + unsigned int i; + + if (!op_flag_no_hi(flags) && c.r.op < 32) { + rflags = is_signed ? REG_EXT : REG_ZEXT; + hiflags = is_signed ? REG_EXT : (REG_EXT | REG_ZEXT); + } _jit_name(block->_jit, __func__); jit_note(__FILE__, __LINE__); - rs = c.r.rs ? lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0) : 0; - rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, REG_EXT); - if (c.r.rs == 0) { - jit_movi(rd, 0); - } else { -#if __WORDSIZE == 32 - jit_movr(rd, rs); -#else - jit_extr_i(rd, rs); -#endif + rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, rflags); + + /* + * We must handle the case where one of the output registers is our rs + * input register. Thanksfully, computing LO/HI can be done in any + * order. Here, we make sure that the computation that overwrites the + * input register is always performed last. + */ + for (i = 0; i < 2; i++) { + if ((!i ^ (reg_lo == c.i.rs)) && !op_flag_no_lo(flags)) { + lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0); + + if (c.r.op < 32) + jit_lshi(lo, rs, c.r.op); + else + jit_movi(lo, 0); + + lightrec_free_reg(reg_cache, lo); + continue; + } + + if ((!!i ^ (reg_lo == c.i.rs)) && !op_flag_no_hi(flags)) { + hi = lightrec_alloc_reg_out(reg_cache, _jit, + reg_hi, hiflags); + + if (c.r.op >= 32) + jit_lshi(hi, rs, c.r.op - 32); + else if (is_signed) + jit_rshi(hi, rs, 32 - c.r.op); + else + jit_rshi_u(hi, rs, 32 - c.r.op); + + lightrec_free_reg(reg_cache, hi); + } } - lightrec_free_reg(state->reg_cache, rs); - lightrec_free_reg(state->reg_cache, rd); + lightrec_free_reg(reg_cache, rs); + + _jit_name(block->_jit, __func__); + jit_note(__FILE__, __LINE__); } static const lightrec_rec_func_t rec_standard[64] = { @@ -1618,9 +2486,11 @@ static const lightrec_rec_func_t rec_standard[64] = { [OP_LWC2] = rec_LWC2, [OP_SWC2] = rec_SWC2, - [OP_META_BEQZ] = rec_meta_BEQZ, - [OP_META_BNEZ] = rec_meta_BNEZ, [OP_META_MOV] = rec_meta_MOV, + [OP_META_EXTC] = rec_meta_EXTC_EXTS, + [OP_META_EXTS] = rec_meta_EXTC_EXTS, + [OP_META_MULT2] = rec_meta_MULT2, + [OP_META_MULTU2] = rec_meta_MULT2, }; static const lightrec_rec_func_t rec_special[64] = { @@ -1680,8 +2550,8 @@ static const lightrec_rec_func_t rec_cp2_basic[64] = { [OP_CP2_BASIC_CTC2] = rec_cp2_basic_CTC2, }; -static void rec_SPECIAL(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_SPECIAL(struct lightrec_cstate *state, + const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; lightrec_rec_func_t f = rec_special[c.r.op]; @@ -1692,8 +2562,8 @@ static void rec_SPECIAL(struct lightrec_state *state, const struct block *block, (*f)(state, block, offset); } -static void rec_REGIMM(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_REGIMM(struct lightrec_cstate *state, + const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; lightrec_rec_func_t f = rec_regimm[c.r.rt]; @@ -1704,8 +2574,8 @@ static void rec_REGIMM(struct lightrec_state *state, const struct block *block, (*f)(state, block, offset); } -static void rec_CP0(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_CP0(struct lightrec_cstate *state, + const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; lightrec_rec_func_t f = rec_cp0[c.r.rs]; @@ -1716,8 +2586,8 @@ static void rec_CP0(struct lightrec_state *state, const struct block *block, (*f)(state, block, offset); } -static void rec_CP2(struct lightrec_state *state, const struct block *block, - u16 offset) +static void rec_CP2(struct lightrec_cstate *state, + const struct block *block, u16 offset) { union code c = block->opcode_list[offset].c; @@ -1733,17 +2603,19 @@ static void rec_CP2(struct lightrec_state *state, const struct block *block, rec_CP(state, block, offset); } -void lightrec_rec_opcode(struct lightrec_state *state, const struct block *block, - u16 offset) +void lightrec_rec_opcode(struct lightrec_cstate *state, + const struct block *block, u16 offset) { struct regcache *reg_cache = state->reg_cache; struct lightrec_branch_target *target; const struct opcode *op = &block->opcode_list[offset]; jit_state_t *_jit = block->_jit; lightrec_rec_func_t f; + u16 unload_offset; - if (op->flags & LIGHTREC_SYNC) { - jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles); + if (op_flag_sync(op->flags)) { + if (state->cycles) + jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles); state->cycles = 0; lightrec_storeback_regs(reg_cache, _jit); @@ -1764,16 +2636,10 @@ void lightrec_rec_opcode(struct lightrec_state *state, const struct block *block (*f)(state, block, offset); } - if (unlikely(op->flags & LIGHTREC_UNLOAD_RD)) { - lightrec_clean_reg_if_loaded(reg_cache, _jit, op->r.rd, true); - pr_debug("Cleaning RD reg %s\n", lightrec_reg_name(op->r.rd)); - } - if (unlikely(op->flags & LIGHTREC_UNLOAD_RS)) { - lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rs, true); - pr_debug("Cleaning RS reg %s\n", lightrec_reg_name(op->i.rt)); - } - if (unlikely(op->flags & LIGHTREC_UNLOAD_RT)) { - lightrec_clean_reg_if_loaded(reg_cache, _jit, op->i.rt, true); - pr_debug("Cleaning RT reg %s\n", lightrec_reg_name(op->i.rt)); + if (OPT_EARLY_UNLOAD) { + unload_offset = offset + + (has_delay_slot(op->c) && !op_flag_no_ds(op->flags)); + + lightrec_do_early_unload(state, block, unload_offset); } } diff --git a/deps/lightrec/emitter.h b/deps/lightrec/emitter.h index d0fb883c9..4cbe8da64 100644 --- a/deps/lightrec/emitter.h +++ b/deps/lightrec/emitter.h @@ -9,11 +9,11 @@ #include "lightrec.h" struct block; +struct lightrec_cstate; struct opcode; -void lightrec_rec_opcode(struct lightrec_state *state, const struct block *block, - u16 offset); -void lightrec_emit_eob(struct lightrec_state *state, const struct block *block, - u16 offset); +void lightrec_rec_opcode(struct lightrec_cstate *state, const struct block *block, u16 offset); +void lightrec_emit_eob(struct lightrec_cstate *state, + const struct block *block, u16 offset); #endif /* __EMITTER_H__ */ diff --git a/deps/lightrec/interpreter.c b/deps/lightrec/interpreter.c index 199233815..43bea83f0 100644 --- a/deps/lightrec/interpreter.c +++ b/deps/lightrec/interpreter.c @@ -63,7 +63,7 @@ static inline u32 jump_skip(struct interpreter *inter) inter->op = next_op(inter); inter->offset++; - if (inter->op->flags & LIGHTREC_SYNC) { + if (op_flag_sync(inter->op->flags)) { inter->state->current_cycle += inter->cycles; inter->cycles = 0; } @@ -101,8 +101,8 @@ static void update_cycles_before_branch(struct interpreter *inter) if (!inter->delay_slot) { cycles = lightrec_cycles_of_opcode(inter->op->c); - if (has_delay_slot(inter->op->c) && - !(inter->op->flags & LIGHTREC_NO_DS)) + if (!op_flag_no_ds(inter->op->flags) && + has_delay_slot(inter->op->c)) cycles += lightrec_cycles_of_opcode(next_op(inter)->c); inter->cycles += cycles; @@ -120,10 +120,8 @@ static bool is_branch_taken(const u32 *reg_cache, union code op) case OP_JAL: return true; case OP_BEQ: - case OP_META_BEQZ: return reg_cache[op.r.rs] == reg_cache[op.r.rt]; case OP_BNE: - case OP_META_BNEZ: return reg_cache[op.r.rs] != reg_cache[op.r.rt]; case OP_REGIMM: switch (op.r.rt) { @@ -144,7 +142,7 @@ static bool is_branch_taken(const u32 *reg_cache, union code op) static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch) { struct lightrec_state *state = inter->state; - u32 *reg_cache = state->native_reg_cache; + u32 *reg_cache = state->regs.gpr; struct opcode new_op, *op = next_op(inter); union code op_next; struct interpreter inter2 = { @@ -169,8 +167,8 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch) * but on branch boundaries, we need to adjust the return * address so that the GTE opcode is effectively executed. */ - cause = (*state->ops.cop0_ops.cfc)(state, op->c.opcode, 13); - epc = (*state->ops.cop0_ops.cfc)(state, op->c.opcode, 14); + cause = state->regs.cp0[13]; + epc = state->regs.cp0[14]; if (!(cause & 0x7c) && epc == pc - 4) pc -= 4; @@ -329,9 +327,9 @@ static u32 int_jump(struct interpreter *inter, bool link) u32 pc = (old_pc & 0xf0000000) | (inter->op->j.imm << 2); if (link) - state->native_reg_cache[31] = old_pc + 8; + state->regs.gpr[31] = old_pc + 8; - if (inter->op->flags & LIGHTREC_NO_DS) + if (op_flag_no_ds(inter->op->flags)) return pc; return int_delay_slot(inter, pc, true); @@ -350,14 +348,18 @@ static u32 int_JAL(struct interpreter *inter) static u32 int_jumpr(struct interpreter *inter, u8 link_reg) { struct lightrec_state *state = inter->state; - u32 old_pc, next_pc = state->native_reg_cache[inter->op->r.rs]; + u32 old_pc = int_get_branch_pc(inter); + u32 next_pc = state->regs.gpr[inter->op->r.rs]; - if (link_reg) { - old_pc = int_get_branch_pc(inter); - state->native_reg_cache[link_reg] = old_pc + 8; + if (op_flag_emulate_branch(inter->op->flags) && inter->offset) { + inter->cycles -= lightrec_cycles_of_opcode(inter->op->c); + return old_pc; } - if (inter->op->flags & LIGHTREC_NO_DS) + if (link_reg) + state->regs.gpr[link_reg] = old_pc + 8; + + if (op_flag_no_ds(inter->op->flags)) return next_pc; return int_delay_slot(inter, next_pc, true); @@ -375,8 +377,7 @@ static u32 int_special_JALR(struct interpreter *inter) static u32 int_do_branch(struct interpreter *inter, u32 old_pc, u32 next_pc) { - if (!inter->delay_slot && - (inter->op->flags & LIGHTREC_LOCAL_BRANCH) && + if (!inter->delay_slot && op_flag_local_branch(inter->op->flags) && (s16)inter->op->c.i.imm >= 0) { next_pc = old_pc + ((1 + (s16)inter->op->c.i.imm) << 2); next_pc = lightrec_emulate_block(inter->state, inter->block, next_pc); @@ -390,9 +391,14 @@ static u32 int_branch(struct interpreter *inter, u32 pc, { u32 next_pc = pc + 4 + ((s16)code.i.imm << 2); + if (op_flag_emulate_branch(inter->op->flags) && inter->offset) { + inter->cycles -= lightrec_cycles_of_opcode(inter->op->c); + return pc; + } + update_cycles_before_branch(inter); - if (inter->op->flags & LIGHTREC_NO_DS) { + if (op_flag_no_ds(inter->op->flags)) { if (branch) return int_do_branch(inter, pc, next_pc); else @@ -405,7 +411,7 @@ static u32 int_branch(struct interpreter *inter, u32 pc, if (branch) return int_do_branch(inter, pc, next_pc); - if (inter->op->flags & LIGHTREC_EMULATE_BRANCH) + if (op_flag_emulate_branch(inter->op->flags)) return pc + 8; else return jump_after_branch(inter); @@ -415,8 +421,8 @@ static u32 int_beq(struct interpreter *inter, bool bne) { u32 rs, rt, old_pc = int_get_branch_pc(inter); - rs = inter->state->native_reg_cache[inter->op->i.rs]; - rt = inter->state->native_reg_cache[inter->op->i.rt]; + rs = inter->state->regs.gpr[inter->op->i.rs]; + rt = inter->state->regs.gpr[inter->op->i.rt]; return int_branch(inter, old_pc, inter->op->c, (rs == rt) ^ bne); } @@ -437,9 +443,9 @@ static u32 int_bgez(struct interpreter *inter, bool link, bool lt, bool regimm) s32 rs; if (link) - inter->state->native_reg_cache[31] = old_pc + 8; + inter->state->regs.gpr[31] = old_pc + 8; - rs = (s32)inter->state->native_reg_cache[inter->op->i.rs]; + rs = (s32)inter->state->regs.gpr[inter->op->i.rs]; return int_branch(inter, old_pc, inter->op->c, ((regimm && !rs) || rs > 0) ^ lt); @@ -484,7 +490,7 @@ static u32 int_cfc(struct interpreter *inter) val = lightrec_mfc(state, op->c); if (likely(op->r.rt)) - state->native_reg_cache[op->r.rt] = val; + state->regs.gpr[op->r.rt] = val; return jump_next(inter); } @@ -494,12 +500,12 @@ static u32 int_ctc(struct interpreter *inter) struct lightrec_state *state = inter->state; const struct opcode *op = inter->op; - lightrec_mtc(state, op->c, state->native_reg_cache[op->r.rt]); + lightrec_mtc(state, op->c, state->regs.gpr[op->r.rt]); /* If we have a MTC0 or CTC0 to CP0 register 12 (Status) or 13 (Cause), * return early so that the emulator will be able to check software * interrupt status. */ - if (!(inter->op->flags & LIGHTREC_NO_DS) && + if (!op_flag_no_ds(inter->op->flags) && op->i.op == OP_CP0 && (op->r.rd == 12 || op->r.rd == 13)) return int_get_ds_pc(inter, 1); else @@ -508,40 +514,21 @@ static u32 int_ctc(struct interpreter *inter) static u32 int_cp0_RFE(struct interpreter *inter) { - struct lightrec_state *state = inter->state; - u32 status; - - /* Read CP0 Status register (r12) */ - status = state->ops.cop0_ops.mfc(state, inter->op->c.opcode, 12); - - /* Switch the bits */ - status = ((status & 0x3c) >> 2) | (status & ~0xf); - - /* Write it back */ - state->ops.cop0_ops.ctc(state, inter->op->c.opcode, 12, status); + lightrec_rfe(inter->state); return jump_next(inter); } static u32 int_CP(struct interpreter *inter) { - struct lightrec_state *state = inter->state; - const struct lightrec_cop_ops *ops; - const struct opcode *op = inter->op; - - if (op->i.op == OP_CP2) - ops = &state->ops.cop2_ops; - else - ops = &state->ops.cop0_ops; - - (*ops->op)(state, (op->j.imm) & ~(1 << 25)); + lightrec_cp(inter->state, inter->op->c); return jump_next(inter); } static u32 int_ADDI(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_i *op = &inter->op->i; if (likely(op->rt)) @@ -552,7 +539,7 @@ static u32 int_ADDI(struct interpreter *inter) static u32 int_SLTI(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_i *op = &inter->op->i; if (likely(op->rt)) @@ -563,7 +550,7 @@ static u32 int_SLTI(struct interpreter *inter) static u32 int_SLTIU(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_i *op = &inter->op->i; if (likely(op->rt)) @@ -574,7 +561,7 @@ static u32 int_SLTIU(struct interpreter *inter) static u32 int_ANDI(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_i *op = &inter->op->i; if (likely(op->rt)) @@ -585,7 +572,7 @@ static u32 int_ANDI(struct interpreter *inter) static u32 int_ORI(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_i *op = &inter->op->i; if (likely(op->rt)) @@ -596,7 +583,7 @@ static u32 int_ORI(struct interpreter *inter) static u32 int_XORI(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_i *op = &inter->op->i; if (likely(op->rt)) @@ -609,7 +596,7 @@ static u32 int_LUI(struct interpreter *inter) { struct opcode_i *op = &inter->op->i; - inter->state->native_reg_cache[op->rt] = op->imm << 16; + inter->state->regs.gpr[op->rt] = op->imm << 16; return jump_next(inter); } @@ -617,7 +604,7 @@ static u32 int_LUI(struct interpreter *inter) static u32 int_io(struct interpreter *inter, bool is_load) { struct opcode_i *op = &inter->op->i; - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; u32 val; val = lightrec_rw(inter->state, inter->op->c, @@ -639,12 +626,12 @@ static u32 int_store(struct interpreter *inter) { u32 next_pc; - if (likely(!(inter->op->flags & LIGHTREC_SMC))) + if (likely(!op_flag_smc(inter->op->flags))) return int_io(inter, false); lightrec_rw(inter->state, inter->op->c, - inter->state->native_reg_cache[inter->op->i.rs], - inter->state->native_reg_cache[inter->op->i.rt], + inter->state->regs.gpr[inter->op->i.rs], + inter->state->regs.gpr[inter->op->i.rt], &inter->op->flags, inter->block); next_pc = int_get_ds_pc(inter, 1); @@ -666,8 +653,8 @@ static u32 int_special_SLL(struct interpreter *inter) u32 rt; if (op->opcode) { /* Handle NOPs */ - rt = inter->state->native_reg_cache[op->r.rt]; - inter->state->native_reg_cache[op->r.rd] = rt << op->r.imm; + rt = inter->state->regs.gpr[op->r.rt]; + inter->state->regs.gpr[op->r.rd] = rt << op->r.imm; } return jump_next(inter); @@ -676,9 +663,9 @@ static u32 int_special_SLL(struct interpreter *inter) static u32 int_special_SRL(struct interpreter *inter) { struct opcode *op = inter->op; - u32 rt = inter->state->native_reg_cache[op->r.rt]; + u32 rt = inter->state->regs.gpr[op->r.rt]; - inter->state->native_reg_cache[op->r.rd] = rt >> op->r.imm; + inter->state->regs.gpr[op->r.rd] = rt >> op->r.imm; return jump_next(inter); } @@ -686,9 +673,9 @@ static u32 int_special_SRL(struct interpreter *inter) static u32 int_special_SRA(struct interpreter *inter) { struct opcode *op = inter->op; - s32 rt = inter->state->native_reg_cache[op->r.rt]; + s32 rt = inter->state->regs.gpr[op->r.rt]; - inter->state->native_reg_cache[op->r.rd] = rt >> op->r.imm; + inter->state->regs.gpr[op->r.rd] = rt >> op->r.imm; return jump_next(inter); } @@ -696,10 +683,10 @@ static u32 int_special_SRA(struct interpreter *inter) static u32 int_special_SLLV(struct interpreter *inter) { struct opcode *op = inter->op; - u32 rs = inter->state->native_reg_cache[op->r.rs]; - u32 rt = inter->state->native_reg_cache[op->r.rt]; + u32 rs = inter->state->regs.gpr[op->r.rs]; + u32 rt = inter->state->regs.gpr[op->r.rt]; - inter->state->native_reg_cache[op->r.rd] = rt << (rs & 0x1f); + inter->state->regs.gpr[op->r.rd] = rt << (rs & 0x1f); return jump_next(inter); } @@ -707,10 +694,10 @@ static u32 int_special_SLLV(struct interpreter *inter) static u32 int_special_SRLV(struct interpreter *inter) { struct opcode *op = inter->op; - u32 rs = inter->state->native_reg_cache[op->r.rs]; - u32 rt = inter->state->native_reg_cache[op->r.rt]; + u32 rs = inter->state->regs.gpr[op->r.rs]; + u32 rt = inter->state->regs.gpr[op->r.rt]; - inter->state->native_reg_cache[op->r.rd] = rt >> (rs & 0x1f); + inter->state->regs.gpr[op->r.rd] = rt >> (rs & 0x1f); return jump_next(inter); } @@ -718,10 +705,10 @@ static u32 int_special_SRLV(struct interpreter *inter) static u32 int_special_SRAV(struct interpreter *inter) { struct opcode *op = inter->op; - u32 rs = inter->state->native_reg_cache[op->r.rs]; - s32 rt = inter->state->native_reg_cache[op->r.rt]; + u32 rs = inter->state->regs.gpr[op->r.rs]; + s32 rt = inter->state->regs.gpr[op->r.rt]; - inter->state->native_reg_cache[op->r.rd] = rt >> (rs & 0x1f); + inter->state->regs.gpr[op->r.rd] = rt >> (rs & 0x1f); return jump_next(inter); } @@ -739,7 +726,7 @@ static u32 int_syscall_break(struct interpreter *inter) static u32 int_special_MFHI(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_r *op = &inter->op->r; if (likely(op->rd)) @@ -750,7 +737,7 @@ static u32 int_special_MFHI(struct interpreter *inter) static u32 int_special_MTHI(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; reg_cache[REG_HI] = reg_cache[inter->op->r.rs]; @@ -759,7 +746,7 @@ static u32 int_special_MTHI(struct interpreter *inter) static u32 int_special_MFLO(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_r *op = &inter->op->r; if (likely(op->rd)) @@ -770,7 +757,7 @@ static u32 int_special_MFLO(struct interpreter *inter) static u32 int_special_MTLO(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; reg_cache[REG_LO] = reg_cache[inter->op->r.rs]; @@ -779,16 +766,16 @@ static u32 int_special_MTLO(struct interpreter *inter) static u32 int_special_MULT(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; s32 rs = reg_cache[inter->op->r.rs]; s32 rt = reg_cache[inter->op->r.rt]; u8 reg_lo = get_mult_div_lo(inter->op->c); u8 reg_hi = get_mult_div_hi(inter->op->c); u64 res = (s64)rs * (s64)rt; - if (!(inter->op->flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(inter->op->flags)) reg_cache[reg_hi] = res >> 32; - if (!(inter->op->flags & LIGHTREC_NO_LO)) + if (!op_flag_no_lo(inter->op->flags)) reg_cache[reg_lo] = res; return jump_next(inter); @@ -796,16 +783,16 @@ static u32 int_special_MULT(struct interpreter *inter) static u32 int_special_MULTU(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; u32 rs = reg_cache[inter->op->r.rs]; u32 rt = reg_cache[inter->op->r.rt]; u8 reg_lo = get_mult_div_lo(inter->op->c); u8 reg_hi = get_mult_div_hi(inter->op->c); u64 res = (u64)rs * (u64)rt; - if (!(inter->op->flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(inter->op->flags)) reg_cache[reg_hi] = res >> 32; - if (!(inter->op->flags & LIGHTREC_NO_LO)) + if (!op_flag_no_lo(inter->op->flags)) reg_cache[reg_lo] = res; return jump_next(inter); @@ -813,7 +800,7 @@ static u32 int_special_MULTU(struct interpreter *inter) static u32 int_special_DIV(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; s32 rs = reg_cache[inter->op->r.rs]; s32 rt = reg_cache[inter->op->r.rt]; u8 reg_lo = get_mult_div_lo(inter->op->c); @@ -828,9 +815,9 @@ static u32 int_special_DIV(struct interpreter *inter) hi = rs % rt; } - if (!(inter->op->flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(inter->op->flags)) reg_cache[reg_hi] = hi; - if (!(inter->op->flags & LIGHTREC_NO_LO)) + if (!op_flag_no_lo(inter->op->flags)) reg_cache[reg_lo] = lo; return jump_next(inter); @@ -838,7 +825,7 @@ static u32 int_special_DIV(struct interpreter *inter) static u32 int_special_DIVU(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; u32 rs = reg_cache[inter->op->r.rs]; u32 rt = reg_cache[inter->op->r.rt]; u8 reg_lo = get_mult_div_lo(inter->op->c); @@ -853,9 +840,9 @@ static u32 int_special_DIVU(struct interpreter *inter) hi = rs % rt; } - if (!(inter->op->flags & LIGHTREC_NO_HI)) + if (!op_flag_no_hi(inter->op->flags)) reg_cache[reg_hi] = hi; - if (!(inter->op->flags & LIGHTREC_NO_LO)) + if (!op_flag_no_lo(inter->op->flags)) reg_cache[reg_lo] = lo; return jump_next(inter); @@ -863,7 +850,7 @@ static u32 int_special_DIVU(struct interpreter *inter) static u32 int_special_ADD(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_r *op = &inter->op->r; s32 rs = reg_cache[op->rs]; s32 rt = reg_cache[op->rt]; @@ -876,7 +863,7 @@ static u32 int_special_ADD(struct interpreter *inter) static u32 int_special_SUB(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_r *op = &inter->op->r; u32 rs = reg_cache[op->rs]; u32 rt = reg_cache[op->rt]; @@ -889,7 +876,7 @@ static u32 int_special_SUB(struct interpreter *inter) static u32 int_special_AND(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_r *op = &inter->op->r; u32 rs = reg_cache[op->rs]; u32 rt = reg_cache[op->rt]; @@ -902,7 +889,7 @@ static u32 int_special_AND(struct interpreter *inter) static u32 int_special_OR(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_r *op = &inter->op->r; u32 rs = reg_cache[op->rs]; u32 rt = reg_cache[op->rt]; @@ -915,7 +902,7 @@ static u32 int_special_OR(struct interpreter *inter) static u32 int_special_XOR(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_r *op = &inter->op->r; u32 rs = reg_cache[op->rs]; u32 rt = reg_cache[op->rt]; @@ -928,7 +915,7 @@ static u32 int_special_XOR(struct interpreter *inter) static u32 int_special_NOR(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_r *op = &inter->op->r; u32 rs = reg_cache[op->rs]; u32 rt = reg_cache[op->rt]; @@ -941,7 +928,7 @@ static u32 int_special_NOR(struct interpreter *inter) static u32 int_special_SLT(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_r *op = &inter->op->r; s32 rs = reg_cache[op->rs]; s32 rt = reg_cache[op->rt]; @@ -954,7 +941,7 @@ static u32 int_special_SLT(struct interpreter *inter) static u32 int_special_SLTU(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_r *op = &inter->op->r; u32 rs = reg_cache[op->rs]; u32 rt = reg_cache[op->rt]; @@ -967,7 +954,7 @@ static u32 int_special_SLTU(struct interpreter *inter) static u32 int_META_MOV(struct interpreter *inter) { - u32 *reg_cache = inter->state->native_reg_cache; + u32 *reg_cache = inter->state->regs.gpr; struct opcode_r *op = &inter->op->r; if (likely(op->rd)) @@ -976,6 +963,55 @@ static u32 int_META_MOV(struct interpreter *inter) return jump_next(inter); } +static u32 int_META_EXTC(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->regs.gpr; + struct opcode_i *op = &inter->op->i; + + if (likely(op->rt)) + reg_cache[op->rt] = (u32)(s32)(s8)reg_cache[op->rs]; + + return jump_next(inter); +} + +static u32 int_META_EXTS(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->regs.gpr; + struct opcode_i *op = &inter->op->i; + + if (likely(op->rt)) + reg_cache[op->rt] = (u32)(s32)(s16)reg_cache[op->rs]; + + return jump_next(inter); +} + +static u32 int_META_MULT2(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->regs.gpr; + union code c = inter->op->c; + u32 rs = reg_cache[c.r.rs]; + u8 reg_lo = get_mult_div_lo(c); + u8 reg_hi = get_mult_div_hi(c); + + if (!op_flag_no_lo(inter->op->flags)) { + if (c.r.op < 32) + reg_cache[reg_lo] = rs << c.r.op; + else + reg_cache[reg_lo] = 0; + } + + if (!op_flag_no_hi(inter->op->flags)) { + if (c.r.op >= 32) + reg_cache[reg_hi] = rs << (c.r.op - 32); + else if (c.i.op == OP_META_MULT2) + reg_cache[reg_hi] = (s32) rs >> (32 - c.r.op); + else + reg_cache[reg_hi] = rs >> (32 - c.r.op); + } + + return jump_next(inter); +} + static const lightrec_int_func_t int_standard[64] = { SET_DEFAULT_ELM(int_standard, int_unimplemented), [OP_SPECIAL] = int_SPECIAL, @@ -1011,9 +1047,11 @@ static const lightrec_int_func_t int_standard[64] = { [OP_LWC2] = int_LWC2, [OP_SWC2] = int_store, - [OP_META_BEQZ] = int_BEQ, - [OP_META_BNEZ] = int_BNE, [OP_META_MOV] = int_META_MOV, + [OP_META_EXTC] = int_META_EXTC, + [OP_META_EXTS] = int_META_EXTS, + [OP_META_MULT2] = int_META_MULT2, + [OP_META_MULTU2] = int_META_MULT2, }; static const lightrec_int_func_t int_special[64] = { @@ -1146,5 +1184,7 @@ u32 lightrec_emulate_block(struct lightrec_state *state, struct block *block, u3 pr_err("PC 0x%x is outside block at PC 0x%x\n", pc, block->pc); + lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT); + return 0; } diff --git a/deps/lightrec/lightning-wrapper.h b/deps/lightrec/lightning-wrapper.h new file mode 100644 index 000000000..b0e8bf3bb --- /dev/null +++ b/deps/lightrec/lightning-wrapper.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* + * Copyright (C) 2022 Paul Cercueil + */ + +#ifndef __LIGHTNING_WRAPPER_H__ +#define __LIGHTNING_WRAPPER_H__ + +#include + +#if __WORDSIZE == 32 + +#define jit_ldxi_ui(u,v,w) jit_ldxi_i(u,v,w) +#define jit_stxi_ui(u,v,w) jit_stxi_i(u,v,w) +#define jit_extr_i(u,v) jit_movr(u,v) +#define jit_extr_ui(u,v) jit_movr(u,v) +#define jit_retval_ui(u) jit_retval(u) +#define jit_getarg_ui(u,v) jit_getarg_i(u,v) + +#endif + +#define jit_b() jit_beqr(0, 0) + +#endif /* __LIGHTNING_WRAPPER_H__ */ diff --git a/deps/lightrec/config.h.cmakein b/deps/lightrec/lightrec-config.h.cmakein similarity index 91% rename from deps/lightrec/config.h.cmakein rename to deps/lightrec/lightrec-config.h.cmakein index 47eac522c..11886653a 100644 --- a/deps/lightrec/config.h.cmakein +++ b/deps/lightrec/lightrec-config.h.cmakein @@ -9,7 +9,7 @@ #cmakedefine01 ENABLE_THREADED_COMPILER #cmakedefine01 ENABLE_FIRST_PASS #cmakedefine01 ENABLE_DISASSEMBLER -#cmakedefine01 ENABLE_TINYMM +#cmakedefine01 ENABLE_CODE_BUFFER #cmakedefine01 HAS_DEFAULT_ELM @@ -20,6 +20,7 @@ #cmakedefine01 OPT_LOCAL_BRANCHES #cmakedefine01 OPT_SWITCH_DELAY_SLOTS #cmakedefine01 OPT_FLAG_STORES +#cmakedefine01 OPT_FLAG_IO #cmakedefine01 OPT_FLAG_MULT_DIV #cmakedefine01 OPT_EARLY_UNLOAD diff --git a/deps/lightrec/lightrec-private.h b/deps/lightrec/lightrec-private.h index 86ca1b826..56032f500 100644 --- a/deps/lightrec/lightrec-private.h +++ b/deps/lightrec/lightrec-private.h @@ -6,16 +6,25 @@ #ifndef __LIGHTREC_PRIVATE_H__ #define __LIGHTREC_PRIVATE_H__ -#include "config.h" +#include "lightning-wrapper.h" +#include "lightrec-config.h" #include "disassembler.h" #include "lightrec.h" +#include "regcache.h" #if ENABLE_THREADED_COMPILER #include #endif +#ifdef _MSC_BUILD +#include +#endif + #define ARRAY_SIZE(x) (sizeof(x) ? sizeof(x) / sizeof((x)[0]) : 0) +#define GENMASK(h, l) \ + (((uintptr_t)-1 << (l)) & ((uintptr_t)-1 >> (__WORDSIZE - 1 - (h)))) + #ifdef __GNUC__ # define likely(x) __builtin_expect(!!(x),1) # define unlikely(x) __builtin_expect(!!(x),0) @@ -42,12 +51,28 @@ #define SET_DEFAULT_ELM(table, value) [0] = NULL #endif +#define fallthrough do {} while (0) /* fall-through */ + +#define container_of(ptr, type, member) \ + ((type *)((void *)(ptr) - offsetof(type, member))) + +#ifdef _MSC_BUILD +# define popcount32(x) __popcnt(x) +# define clz32(x) _lzcnt_u32(x) +# define ctz32(x) _tzcnt_u32(x) +#else +# define popcount32(x) __builtin_popcount(x) +# define clz32(x) __builtin_clz(x) +# define ctz32(x) __builtin_ctz(x) +#endif + /* Flags for (struct block *)->flags */ #define BLOCK_NEVER_COMPILE BIT(0) #define BLOCK_SHOULD_RECOMPILE BIT(1) #define BLOCK_FULLY_TAGGED BIT(2) #define BLOCK_IS_DEAD BIT(3) #define BLOCK_IS_MEMSET BIT(4) +#define BLOCK_NO_OPCODE_LIST BIT(5) #define RAM_SIZE 0x200000 #define BIOS_SIZE 0x80000 @@ -66,9 +91,16 @@ struct blockcache; struct recompiler; struct regcache; struct opcode; -struct tinymm; struct reaper; +struct u16x2 { +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + u16 h, l; +#else + u16 l, h; +#endif +}; + struct block { jit_state_t *_jit; struct opcode *opcode_list; @@ -77,11 +109,13 @@ struct block { struct block *next; u32 pc; u32 hash; + u32 precompile_date; unsigned int code_size; u16 nb_ops; - u8 flags; #if ENABLE_THREADED_COMPILER - atomic_flag op_list_freed; + _Atomic u8 flags; +#else + u8 flags; #endif }; @@ -100,55 +134,66 @@ enum c_wrappers { C_WRAPPER_RW_GENERIC, C_WRAPPER_MFC, C_WRAPPER_MTC, - C_WRAPPER_RFE, C_WRAPPER_CP, - C_WRAPPER_SYSCALL, - C_WRAPPER_BREAK, C_WRAPPERS_COUNT, }; +struct lightrec_cstate { + struct lightrec_state *state; + + struct lightrec_branch local_branches[512]; + struct lightrec_branch_target targets[512]; + unsigned int nb_local_branches; + unsigned int nb_targets; + unsigned int cycles; + + struct regcache *reg_cache; +}; + struct lightrec_state { - u32 native_reg_cache[34]; + struct lightrec_registers regs; + uintptr_t wrapper_regs[NUM_TEMPS]; u32 next_pc; u32 current_cycle; u32 target_cycle; u32 exit_flags; u32 old_cycle_counter; struct block *dispatcher, *c_wrapper_block; - void *c_wrapper, *c_wrappers[C_WRAPPERS_COUNT]; - struct jit_node *branches[512]; - struct lightrec_branch local_branches[512]; - struct lightrec_branch_target targets[512]; - unsigned int nb_branches; - unsigned int nb_local_branches; - unsigned int nb_targets; - struct tinymm *tinymm; + void *c_wrappers[C_WRAPPERS_COUNT]; + void *wrappers_eps[C_WRAPPERS_COUNT]; struct blockcache *block_cache; - struct regcache *reg_cache; struct recompiler *rec; + struct lightrec_cstate *cstate; struct reaper *reaper; + void *tlsf; void (*eob_wrapper_func)(void); void (*memset_func)(void); void (*get_next_block)(void); struct lightrec_ops ops; unsigned int nb_precompile; - unsigned int cycles; unsigned int nb_maps; const struct lightrec_mem_map *maps; - uintptr_t offset_ram, offset_bios, offset_scratch; + uintptr_t offset_ram, offset_bios, offset_scratch, offset_io; + _Bool with_32bit_lut; _Bool mirrors_mapped; _Bool invalidate_from_dma_only; void *code_lut[]; }; u32 lightrec_rw(struct lightrec_state *state, union code op, - u32 addr, u32 data, u16 *flags, + u32 addr, u32 data, u32 *flags, struct block *block); void lightrec_free_block(struct lightrec_state *state, struct block *block); void remove_from_code_lut(struct blockcache *cache, struct block *block); +enum psx_map +lightrec_get_map_idx(struct lightrec_state *state, u32 kaddr); + +const struct lightrec_mem_map * +lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr); + static inline u32 kunseg(u32 addr) { if (unlikely(addr >= 0xa0000000)) @@ -165,11 +210,55 @@ static inline u32 lut_offset(u32 pc) return (pc & (RAM_SIZE - 1)) >> 2; // RAM } +static inline _Bool is_big_endian(void) +{ + return __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__; +} + +static inline _Bool lut_is_32bit(const struct lightrec_state *state) +{ + return __WORDSIZE == 32 || + (ENABLE_CODE_BUFFER && state->with_32bit_lut); +} + +static inline size_t lut_elm_size(const struct lightrec_state *state) +{ + return lut_is_32bit(state) ? 4 : sizeof(void *); +} + +static inline void ** lut_address(struct lightrec_state *state, u32 offset) +{ + if (lut_is_32bit(state)) + return (void **) ((uintptr_t) state->code_lut + offset * 4); + else + return &state->code_lut[offset]; +} + +static inline void * lut_read(struct lightrec_state *state, u32 offset) +{ + void **lut_entry = lut_address(state, offset); + + if (lut_is_32bit(state)) + return (void *)(uintptr_t) *(u32 *) lut_entry; + else + return *lut_entry; +} + +static inline void lut_write(struct lightrec_state *state, u32 offset, void *ptr) +{ + void **lut_entry = lut_address(state, offset); + + if (lut_is_32bit(state)) + *(u32 *) lut_entry = (u32)(uintptr_t) ptr; + else + *lut_entry = ptr; +} + static inline u32 get_ds_pc(const struct block *block, u16 offset, s16 imm) { u16 flags = block->opcode_list[offset].flags; - offset += !!(OPT_SWITCH_DELAY_SLOTS && (flags & LIGHTREC_NO_DS)); + offset += op_flag_no_ds(flags); return block->pc + (offset + imm << 2); } @@ -178,19 +267,24 @@ static inline u32 get_branch_pc(const struct block *block, u16 offset, s16 imm) { u16 flags = block->opcode_list[offset].flags; - offset -= !!(OPT_SWITCH_DELAY_SLOTS && (flags & LIGHTREC_NO_DS)); + offset -= op_flag_no_ds(flags); return block->pc + (offset + imm << 2); } void lightrec_mtc(struct lightrec_state *state, union code op, u32 data); u32 lightrec_mfc(struct lightrec_state *state, union code op); +void lightrec_rfe(struct lightrec_state *state); +void lightrec_cp(struct lightrec_state *state, union code op); + +struct lightrec_cstate * lightrec_create_cstate(struct lightrec_state *state); +void lightrec_free_cstate(struct lightrec_cstate *cstate); union code lightrec_read_opcode(struct lightrec_state *state, u32 pc); -struct block * lightrec_get_block(struct lightrec_state *state, u32 pc); -int lightrec_compile_block(struct lightrec_state *state, struct block *block); -void lightrec_free_opcode_list(struct lightrec_state *state, struct block *block); +int lightrec_compile_block(struct lightrec_cstate *cstate, struct block *block); +void lightrec_free_opcode_list(struct lightrec_state *state, + struct opcode *list); unsigned int lightrec_cycles_of_opcode(union code code); @@ -204,4 +298,46 @@ static inline u8 get_mult_div_hi(union code c) return (OPT_FLAG_MULT_DIV && c.r.imm) ? c.r.imm : REG_HI; } +static inline s16 s16_max(s16 a, s16 b) +{ + return a > b ? a : b; +} + +static inline _Bool block_has_flag(struct block *block, u8 flag) +{ +#if ENABLE_THREADED_COMPILER + return atomic_load_explicit(&block->flags, memory_order_relaxed) & flag; +#else + return block->flags & flag; +#endif +} + +static inline u8 block_set_flags(struct block *block, u8 mask) +{ +#if ENABLE_THREADED_COMPILER + return atomic_fetch_or_explicit(&block->flags, mask, + memory_order_relaxed); +#else + u8 flags = block->flags; + + block->flags |= mask; + + return flags; +#endif +} + +static inline u8 block_clear_flags(struct block *block, u8 mask) +{ +#if ENABLE_THREADED_COMPILER + return atomic_fetch_and_explicit(&block->flags, ~mask, + memory_order_relaxed); +#else + u8 flags = block->flags; + + block->flags &= ~mask; + + return flags; +#endif +} + #endif /* __LIGHTREC_PRIVATE_H__ */ diff --git a/deps/lightrec/lightrec.c b/deps/lightrec/lightrec.c index 5d54f0475..be4da10f4 100644 --- a/deps/lightrec/lightrec.c +++ b/deps/lightrec/lightrec.c @@ -4,20 +4,22 @@ */ #include "blockcache.h" -#include "config.h" #include "debug.h" #include "disassembler.h" #include "emitter.h" #include "interpreter.h" +#include "lightrec-config.h" +#include "lightning-wrapper.h" #include "lightrec.h" #include "memmanager.h" #include "reaper.h" #include "recompiler.h" #include "regcache.h" #include "optimizer.h" +#include "tlsf/tlsf.h" #include -#include +#include #include #if ENABLE_THREADED_COMPILER #include @@ -25,15 +27,13 @@ #include #include #include -#if ENABLE_TINYMM -#include -#endif - -#define GENMASK(h, l) \ - (((uintptr_t)-1 << (l)) & ((uintptr_t)-1 >> (__WORDSIZE - 1 - (h)))) static struct block * lightrec_precompile_block(struct lightrec_state *state, u32 pc); +static bool lightrec_block_is_fully_tagged(const struct block *block); + +static void lightrec_mtc2(struct lightrec_state *state, u8 reg, u32 data); +static u32 lightrec_mfc2(struct lightrec_state *state, u8 reg); static void lightrec_default_sb(struct lightrec_state *state, u32 opcode, void *host, u32 addr, u8 data) @@ -104,7 +104,7 @@ static void lightrec_swl(struct lightrec_state *state, u32 opcode, void *host, u32 addr, u32 data) { unsigned int shift = addr & 0x3; - unsigned int mask = GENMASK(31, (shift + 1) * 8); + unsigned int mask = shift < 3 ? GENMASK(31, (shift + 1) * 8) : 0; u32 old_data; /* Align to 32 bits */ @@ -141,7 +141,7 @@ static void lightrec_swc2(struct lightrec_state *state, union code op, const struct lightrec_mem_map_ops *ops, void *host, u32 addr) { - u32 data = state->ops.cop2_ops.mfc(state, op.opcode, op.i.rt); + u32 data = lightrec_mfc2(state, op.i.rt); ops->sw(state, op.opcode, host, addr, data); } @@ -168,7 +168,7 @@ static u32 lightrec_lwr(struct lightrec_state *state, u32 opcode, void *host, u32 addr, u32 data) { unsigned int shift = addr & 0x3; - unsigned int mask = GENMASK(31, 32 - shift * 8); + unsigned int mask = shift ? GENMASK(31, 32 - shift * 8) : 0; u32 old_data; /* Align to 32 bits */ @@ -186,38 +186,46 @@ static void lightrec_lwc2(struct lightrec_state *state, union code op, { u32 data = ops->lw(state, op.opcode, host, addr); - state->ops.cop2_ops.mtc(state, op.opcode, op.i.rt, data); + lightrec_mtc2(state, op.i.rt, data); } static void lightrec_invalidate_map(struct lightrec_state *state, const struct lightrec_mem_map *map, u32 addr, u32 len) { if (map == &state->maps[PSX_MAP_KERNEL_USER_RAM]) { - memset(&state->code_lut[lut_offset(addr)], 0, - ((len + 3) / 4) * sizeof(void *)); + memset(lut_address(state, lut_offset(addr)), 0, + ((len + 3) / 4) * lut_elm_size(state)); } } -static const struct lightrec_mem_map * -lightrec_get_map(struct lightrec_state *state, - void **host, u32 kaddr) +enum psx_map +lightrec_get_map_idx(struct lightrec_state *state, u32 kaddr) { const struct lightrec_mem_map *map; unsigned int i; - u32 addr; for (i = 0; i < state->nb_maps; i++) { - const struct lightrec_mem_map *mapi = &state->maps[i]; + map = &state->maps[i]; - if (kaddr >= mapi->pc && kaddr < mapi->pc + mapi->length) { - map = mapi; - break; - } + if (kaddr >= map->pc && kaddr < map->pc + map->length) + return (enum psx_map) i; } - if (i == state->nb_maps) + return PSX_MAP_UNKNOWN; +} + +const struct lightrec_mem_map * +lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr) +{ + const struct lightrec_mem_map *map; + enum psx_map idx; + u32 addr; + + idx = lightrec_get_map_idx(state, kaddr); + if (idx == PSX_MAP_UNKNOWN) return NULL; + map = &state->maps[idx]; addr = kaddr - map->pc; while (map->mirror_of) @@ -230,7 +238,7 @@ lightrec_get_map(struct lightrec_state *state, } u32 lightrec_rw(struct lightrec_state *state, union code op, - u32 addr, u32 data, u16 *flags, struct block *block) + u32 addr, u32 data, u32 *flags, struct block *block) { const struct lightrec_mem_map *map; const struct lightrec_mem_map_ops *ops; @@ -245,16 +253,20 @@ u32 lightrec_rw(struct lightrec_state *state, union code op, return 0; } - if (unlikely(map->ops)) { - if (flags) - *flags |= LIGHTREC_HW_IO; - ops = map->ops; - } else { - if (flags) - *flags |= LIGHTREC_DIRECT_IO; + if (likely(!map->ops)) { + if (flags && !LIGHTREC_FLAGS_GET_IO_MODE(*flags)) + *flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT); ops = &lightrec_default_ops; + } else if (flags && + LIGHTREC_FLAGS_GET_IO_MODE(*flags) == LIGHTREC_IO_DIRECT_HW) { + ops = &lightrec_default_ops; + } else { + if (flags && !LIGHTREC_FLAGS_GET_IO_MODE(*flags)) + *flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW); + + ops = map->ops; } switch (op.i.op) { @@ -298,13 +310,11 @@ u32 lightrec_rw(struct lightrec_state *state, union code op, } static void lightrec_rw_helper(struct lightrec_state *state, - union code op, u16 *flags, + union code op, u32 *flags, struct block *block) { - u32 ret = lightrec_rw(state, op, - state->native_reg_cache[op.i.rs], - state->native_reg_cache[op.i.rt], flags, - block); + u32 ret = lightrec_rw(state, op, state->regs.gpr[op.i.rs], + state->regs.gpr[op.i.rt], flags, block); switch (op.i.op) { case OP_LB: @@ -315,15 +325,16 @@ static void lightrec_rw_helper(struct lightrec_state *state, case OP_LWR: case OP_LW: if (op.i.rt) - state->native_reg_cache[op.i.rt] = ret; - default: /* fall-through */ + state->regs.gpr[op.i.rt] = ret; + fallthrough; + default: break; } } -static void lightrec_rw_cb(struct lightrec_state *state, union code op) +static void lightrec_rw_cb(struct lightrec_state *state, u32 arg) { - lightrec_rw_helper(state, op, NULL, NULL); + lightrec_rw_helper(state, (union code) arg, NULL, NULL); } static void lightrec_rw_generic_cb(struct lightrec_state *state, u32 arg) @@ -332,46 +343,115 @@ static void lightrec_rw_generic_cb(struct lightrec_state *state, u32 arg) struct opcode *op; bool was_tagged; u16 offset = (u16)arg; + u16 old_flags; block = lightrec_find_block_from_lut(state->block_cache, arg >> 16, state->next_pc); if (unlikely(!block)) { pr_err("rw_generic: No block found in LUT for PC 0x%x offset 0x%x\n", state->next_pc, offset); + lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT); return; } op = &block->opcode_list[offset]; - was_tagged = op->flags & (LIGHTREC_HW_IO | LIGHTREC_DIRECT_IO); + was_tagged = LIGHTREC_FLAGS_GET_IO_MODE(op->flags); lightrec_rw_helper(state, op->c, &op->flags, block); if (!was_tagged) { - pr_debug("Opcode of block at PC 0x%08x has been tagged - flag " - "for recompilation\n", block->pc); + old_flags = block_set_flags(block, BLOCK_SHOULD_RECOMPILE); + + if (!(old_flags & BLOCK_SHOULD_RECOMPILE)) { + pr_debug("Opcode of block at PC 0x%08x has been tagged" + " - flag for recompilation\n", block->pc); + + lut_write(state, lut_offset(block->pc), NULL); + } + } +} + +static u32 clamp_s32(s32 val, s32 min, s32 max) +{ + return val < min ? min : val > max ? max : val; +} - block->flags |= BLOCK_SHOULD_RECOMPILE; +static u16 load_u16(u32 *ptr) +{ + return ((struct u16x2 *) ptr)->l; +} + +static void store_u16(u32 *ptr, u16 value) +{ + ((struct u16x2 *) ptr)->l = value; +} + +static u32 lightrec_mfc2(struct lightrec_state *state, u8 reg) +{ + s16 gteir1, gteir2, gteir3; + + switch (reg) { + case 1: + case 3: + case 5: + case 8: + case 9: + case 10: + case 11: + return (s32)(s16) load_u16(&state->regs.cp2d[reg]); + case 7: + case 16: + case 17: + case 18: + case 19: + return load_u16(&state->regs.cp2d[reg]); + case 28: + case 29: + gteir1 = (s16) load_u16(&state->regs.cp2d[9]); + gteir2 = (s16) load_u16(&state->regs.cp2d[10]); + gteir3 = (s16) load_u16(&state->regs.cp2d[11]); + + return clamp_s32(gteir1 >> 7, 0, 0x1f) << 0 | + clamp_s32(gteir2 >> 7, 0, 0x1f) << 5 | + clamp_s32(gteir3 >> 7, 0, 0x1f) << 10; + case 15: + reg = 14; + fallthrough; + default: + return state->regs.cp2d[reg]; } } u32 lightrec_mfc(struct lightrec_state *state, union code op) { - bool is_cfc = (op.i.op == OP_CP0 && op.r.rs == OP_CP0_CFC0) || - (op.i.op == OP_CP2 && op.r.rs == OP_CP2_BASIC_CFC2); - u32 (*func)(struct lightrec_state *, u32, u8); - const struct lightrec_cop_ops *ops; + u32 val; if (op.i.op == OP_CP0) - ops = &state->ops.cop0_ops; - else - ops = &state->ops.cop2_ops; + return state->regs.cp0[op.r.rd]; + else if (op.r.rs == OP_CP2_BASIC_MFC2) + val = lightrec_mfc2(state, op.r.rd); + else { + val = state->regs.cp2c[op.r.rd]; + + switch (op.r.rd) { + case 4: + case 12: + case 20: + case 26: + case 27: + case 29: + case 30: + val = (u32)(s16)val; + fallthrough; + default: + break; + } + } - if (is_cfc) - func = ops->cfc; - else - func = ops->mfc; + if (state->ops.cop2_notify) + (*state->ops.cop2_notify)(state, op.opcode, val); - return (*func)(state, op.opcode, op.r.rd); + return val; } static void lightrec_mfc_cb(struct lightrec_state *state, union code op) @@ -379,85 +459,194 @@ static void lightrec_mfc_cb(struct lightrec_state *state, union code op) u32 rt = lightrec_mfc(state, op); if (op.r.rt) - state->native_reg_cache[op.r.rt] = rt; + state->regs.gpr[op.r.rt] = rt; } -void lightrec_mtc(struct lightrec_state *state, union code op, u32 data) +static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data) +{ + u32 status, oldstatus, cause; + + switch (reg) { + case 1: + case 4: + case 8: + case 14: + case 15: + /* Those registers are read-only */ + return; + default: + break; + } + + if (reg == 12) { + status = state->regs.cp0[12]; + oldstatus = status; + + if (status & ~data & BIT(16)) { + state->ops.enable_ram(state, true); + lightrec_invalidate_all(state); + } else if (~status & data & BIT(16)) { + state->ops.enable_ram(state, false); + } + } + + if (reg == 13) { + state->regs.cp0[13] &= ~0x300; + state->regs.cp0[13] |= data & 0x300; + } else { + state->regs.cp0[reg] = data; + } + + if (reg == 12 || reg == 13) { + cause = state->regs.cp0[13]; + status = state->regs.cp0[12]; + + /* Handle software interrupts */ + if (!!(status & cause & 0x300) & status) + lightrec_set_exit_flags(state, LIGHTREC_EXIT_CHECK_INTERRUPT); + + /* Handle hardware interrupts */ + if (reg == 12 && !(~status & 0x401) && (~oldstatus & 0x401)) + lightrec_set_exit_flags(state, LIGHTREC_EXIT_CHECK_INTERRUPT); + } +} + +static u32 count_leading_bits(s32 data) { - bool is_ctc = (op.i.op == OP_CP0 && op.r.rs == OP_CP0_CTC0) || - (op.i.op == OP_CP2 && op.r.rs == OP_CP2_BASIC_CTC2); - void (*func)(struct lightrec_state *, u32, u8, u32); - const struct lightrec_cop_ops *ops; + u32 cnt = 33; - if (op.i.op == OP_CP0) - ops = &state->ops.cop0_ops; - else - ops = &state->ops.cop2_ops; +#ifdef __has_builtin +#if __has_builtin(__builtin_clrsb) + return 1 + __builtin_clrsb(data); +#endif +#endif - if (is_ctc) - func = ops->ctc; - else - func = ops->mtc; + data = (data ^ (data >> 31)) << 1; + + do { + cnt -= 1; + data >>= 1; + } while (data); - (*func)(state, op.opcode, op.r.rd, data); + return cnt; } -static void lightrec_mtc_cb(struct lightrec_state *state, union code op) +static void lightrec_mtc2(struct lightrec_state *state, u8 reg, u32 data) { - lightrec_mtc(state, op, state->native_reg_cache[op.r.rt]); + switch (reg) { + case 15: + state->regs.cp2d[12] = state->regs.cp2d[13]; + state->regs.cp2d[13] = state->regs.cp2d[14]; + state->regs.cp2d[14] = data; + break; + case 28: + state->regs.cp2d[9] = (data << 7) & 0xf80; + state->regs.cp2d[10] = (data << 2) & 0xf80; + state->regs.cp2d[11] = (data >> 3) & 0xf80; + break; + case 31: + return; + case 30: + state->regs.cp2d[31] = count_leading_bits((s32) data); + fallthrough; + default: + state->regs.cp2d[reg] = data; + break; + } } -static void lightrec_rfe_cb(struct lightrec_state *state, union code op) +static void lightrec_ctc2(struct lightrec_state *state, u8 reg, u32 data) +{ + switch (reg) { + case 4: + case 12: + case 20: + case 26: + case 27: + case 29: + case 30: + store_u16(&state->regs.cp2c[reg], data); + break; + case 31: + data = (data & 0x7ffff000) | !!(data & 0x7f87e000) << 31; + fallthrough; + default: + state->regs.cp2c[reg] = data; + break; + } +} + +void lightrec_mtc(struct lightrec_state *state, union code op, u32 data) +{ + if (op.i.op == OP_CP0) { + lightrec_mtc0(state, op.r.rd, data); + } else { + if (op.r.rs == OP_CP2_BASIC_CTC2) + lightrec_ctc2(state, op.r.rd, data); + else + lightrec_mtc2(state, op.r.rd, data); + + if (state->ops.cop2_notify) + (*state->ops.cop2_notify)(state, op.opcode, data); + } +} + +static void lightrec_mtc_cb(struct lightrec_state *state, u32 arg) +{ + union code op = (union code) arg; + + lightrec_mtc(state, op, state->regs.gpr[op.r.rt]); +} + +void lightrec_rfe(struct lightrec_state *state) { u32 status; /* Read CP0 Status register (r12) */ - status = state->ops.cop0_ops.mfc(state, op.opcode, 12); + status = state->regs.cp0[12]; /* Switch the bits */ status = ((status & 0x3c) >> 2) | (status & ~0xf); /* Write it back */ - state->ops.cop0_ops.ctc(state, op.opcode, 12, status); + lightrec_mtc0(state, 12, status); } -static void lightrec_cp_cb(struct lightrec_state *state, union code op) +void lightrec_cp(struct lightrec_state *state, union code op) { - void (*func)(struct lightrec_state *, u32); - - if (op.i.op == OP_CP2) - func = state->ops.cop2_ops.op; - else - func = state->ops.cop0_ops.op; - - (*func)(state, op.opcode); -} + if (op.i.op == OP_CP0) { + pr_err("Invalid CP opcode to coprocessor #0\n"); + return; + } -static void lightrec_syscall_cb(struct lightrec_state *state, union code op) -{ - lightrec_set_exit_flags(state, LIGHTREC_EXIT_SYSCALL); + (*state->ops.cop2_op)(state, op.opcode); } -static void lightrec_break_cb(struct lightrec_state *state, union code op) +static void lightrec_cp_cb(struct lightrec_state *state, u32 arg) { - lightrec_set_exit_flags(state, LIGHTREC_EXIT_BREAK); + lightrec_cp(state, (union code) arg); } -struct block * lightrec_get_block(struct lightrec_state *state, u32 pc) +static struct block * lightrec_get_block(struct lightrec_state *state, u32 pc) { struct block *block = lightrec_find_block(state->block_cache, pc); + u8 old_flags; if (block && lightrec_block_is_outdated(state, block)) { pr_debug("Block at PC 0x%08x is outdated!\n", block->pc); - /* Make sure the recompiler isn't processing the block we'll - * destroy */ - if (ENABLE_THREADED_COMPILER) - lightrec_recompiler_remove(state->rec, block); + old_flags = block_set_flags(block, BLOCK_IS_DEAD); + if (!(old_flags & BLOCK_IS_DEAD)) { + /* Make sure the recompiler isn't processing the block + * we'll destroy */ + if (ENABLE_THREADED_COMPILER) + lightrec_recompiler_remove(state->rec, block); + + lightrec_unregister_block(state->block_cache, block); + remove_from_code_lut(state->block_cache, block); + lightrec_free_block(state, block); + } - lightrec_unregister_block(state->block_cache, block); - remove_from_code_lut(state->block_cache, block); - lightrec_free_block(state, block); block = NULL; } @@ -480,9 +669,10 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) struct block *block; bool should_recompile; void *func; + int err; for (;;) { - func = state->code_lut[lut_offset(pc)]; + func = lut_read(state, lut_offset(pc)); if (func && func != state->get_next_block) break; @@ -491,23 +681,27 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) if (unlikely(!block)) break; - if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET)) { + if (OPT_REPLACE_MEMSET && + block_has_flag(block, BLOCK_IS_MEMSET)) { func = state->memset_func; break; } - should_recompile = block->flags & BLOCK_SHOULD_RECOMPILE && - !(block->flags & BLOCK_IS_DEAD); + should_recompile = block_has_flag(block, BLOCK_SHOULD_RECOMPILE) && + !block_has_flag(block, BLOCK_IS_DEAD); if (unlikely(should_recompile)) { pr_debug("Block at PC 0x%08x should recompile\n", pc); - lightrec_unregister(MEM_FOR_CODE, block->code_size); - - if (ENABLE_THREADED_COMPILER) + if (ENABLE_THREADED_COMPILER) { lightrec_recompiler_add(state->rec, block); - else - lightrec_compile_block(state, block); + } else { + err = lightrec_compile_block(state->cstate, block); + if (err) { + state->exit_flags = LIGHTREC_EXIT_NOMEM; + return NULL; + } + } } if (ENABLE_THREADED_COMPILER && likely(!should_recompile)) @@ -518,18 +712,33 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) if (likely(func)) break; - /* Block wasn't compiled yet - run the interpreter */ - if (!ENABLE_THREADED_COMPILER && - ((ENABLE_FIRST_PASS && likely(!should_recompile)) || - unlikely(block->flags & BLOCK_NEVER_COMPILE))) + if (unlikely(block_has_flag(block, BLOCK_NEVER_COMPILE))) { pc = lightrec_emulate_block(state, block, pc); - if (likely(!(block->flags & BLOCK_NEVER_COMPILE))) { + } else if (!ENABLE_THREADED_COMPILER) { + /* Block wasn't compiled yet - run the interpreter */ + if (block_has_flag(block, BLOCK_FULLY_TAGGED)) + pr_debug("Block fully tagged, skipping first pass\n"); + else if (ENABLE_FIRST_PASS && likely(!should_recompile)) + pc = lightrec_emulate_block(state, block, pc); + /* Then compile it using the profiled data */ - if (ENABLE_THREADED_COMPILER) - lightrec_recompiler_add(state->rec, block); - else - lightrec_compile_block(state, block); + err = lightrec_compile_block(state->cstate, block); + if (err) { + state->exit_flags = LIGHTREC_EXIT_NOMEM; + return NULL; + } + } else if (unlikely(block_has_flag(block, BLOCK_IS_DEAD))) { + /* + * If the block is dead but has never been compiled, + * then its function pointer is NULL and we cannot + * execute the block. In that case, reap all the dead + * blocks now, and in the next loop we will create a + * new block. + */ + lightrec_reaper_reap(state->reaper); + } else { + lightrec_recompiler_add(state->rec, block); } if (state->exit_flags != LIGHTREC_EXIT_NORMAL || @@ -541,15 +750,104 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) return func; } -static s32 c_function_wrapper(struct lightrec_state *state, s32 cycles_delta, - void (*f)(struct lightrec_state *, u32 d), - u32 d) +static void * lightrec_alloc_code(struct lightrec_state *state, size_t size) { - state->current_cycle = state->target_cycle - cycles_delta; + void *code; - (*f)(state, d); + if (ENABLE_THREADED_COMPILER) + lightrec_code_alloc_lock(state); + + code = tlsf_malloc(state->tlsf, size); - return state->target_cycle - state->current_cycle; + if (ENABLE_THREADED_COMPILER) + lightrec_code_alloc_unlock(state); + + return code; +} + +static void lightrec_realloc_code(struct lightrec_state *state, + void *ptr, size_t size) +{ + /* NOTE: 'size' MUST be smaller than the size specified during + * the allocation. */ + + if (ENABLE_THREADED_COMPILER) + lightrec_code_alloc_lock(state); + + tlsf_realloc(state->tlsf, ptr, size); + + if (ENABLE_THREADED_COMPILER) + lightrec_code_alloc_unlock(state); +} + +static void lightrec_free_code(struct lightrec_state *state, void *ptr) +{ + if (ENABLE_THREADED_COMPILER) + lightrec_code_alloc_lock(state); + + tlsf_free(state->tlsf, ptr); + + if (ENABLE_THREADED_COMPILER) + lightrec_code_alloc_unlock(state); +} + +static void * lightrec_emit_code(struct lightrec_state *state, + const struct block *block, + jit_state_t *_jit, unsigned int *size) +{ + bool has_code_buffer = ENABLE_CODE_BUFFER && state->tlsf; + jit_word_t code_size, new_code_size; + void *code; + + jit_realize(); + + if (!ENABLE_DISASSEMBLER) + jit_set_data(NULL, 0, JIT_DISABLE_DATA | JIT_DISABLE_NOTE); + + if (has_code_buffer) { + jit_get_code(&code_size); + code = lightrec_alloc_code(state, (size_t) code_size); + + if (!code) { + if (ENABLE_THREADED_COMPILER) { + /* If we're using the threaded compiler, return + * an allocation error here. The threaded + * compiler will then empty its job queue and + * request a code flush using the reaper. */ + return NULL; + } + + /* Remove outdated blocks, and try again */ + lightrec_remove_outdated_blocks(state->block_cache, block); + + pr_debug("Re-try to alloc %zu bytes...\n", code_size); + + code = lightrec_alloc_code(state, code_size); + if (!code) { + pr_err("Could not alloc even after removing old blocks!\n"); + return NULL; + } + } + + jit_set_code(code, code_size); + } + + code = jit_emit(); + + jit_get_code(&new_code_size); + lightrec_register(MEM_FOR_CODE, new_code_size); + + if (has_code_buffer) { + lightrec_realloc_code(state, code, (size_t) new_code_size); + + pr_debug("Creating code block at address 0x%" PRIxPTR ", " + "code size: %" PRIuPTR " new: %" PRIuPTR "\n", + (uintptr_t) code, code_size, new_code_size); + } + + *size = (unsigned int) new_code_size; + + return code; } static struct block * generate_wrapper(struct lightrec_state *state) @@ -557,9 +855,8 @@ static struct block * generate_wrapper(struct lightrec_state *state) struct block *block; jit_state_t *_jit; unsigned int i; - int stack_ptr; - jit_word_t code_size; - jit_node_t *to_tramp, *to_fn_epilog; + jit_node_t *addr[C_WRAPPERS_COUNT - 1]; + jit_node_t *to_end[C_WRAPPERS_COUNT - 1]; block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block)); if (!block) @@ -574,58 +871,82 @@ static struct block * generate_wrapper(struct lightrec_state *state) /* Wrapper entry point */ jit_prolog(); + jit_tramp(256); - stack_ptr = jit_allocai(sizeof(uintptr_t) * NUM_TEMPS); - - for (i = 0; i < NUM_TEMPS; i++) - jit_stxi(stack_ptr + i * sizeof(uintptr_t), JIT_FP, JIT_R(i)); - - /* Jump to the trampoline */ - to_tramp = jit_jmpi(); + /* Add entry points */ + for (i = C_WRAPPERS_COUNT - 1; i > 0; i--) { + jit_ldxi(JIT_R1, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, c_wrappers[i])); + to_end[i - 1] = jit_b(); + addr[i - 1] = jit_indirect(); + } - /* The trampoline will jump back here */ - to_fn_epilog = jit_label(); + jit_ldxi(JIT_R1, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, c_wrappers[0])); - for (i = 0; i < NUM_TEMPS; i++) - jit_ldxi(JIT_R(i), JIT_FP, stack_ptr + i * sizeof(uintptr_t)); + for (i = 0; i < C_WRAPPERS_COUNT - 1; i++) + jit_patch(to_end[i]); - jit_ret(); jit_epilog(); - - /* Trampoline entry point. - * The sole purpose of the trampoline is to cheese Lightning not to - * save/restore the callee-saved register LIGHTREC_REG_CYCLE, since we - * do want to return to the caller with this register modified. */ jit_prolog(); - jit_tramp(256); - jit_patch(to_tramp); + + /* Save all temporaries on stack */ + for (i = 0; i < NUM_TEMPS; i++) { + if (i + FIRST_TEMP != 1) { + jit_stxi(offsetof(struct lightrec_state, wrapper_regs[i]), + LIGHTREC_REG_STATE, JIT_R(i + FIRST_TEMP)); + } + } + + jit_getarg(JIT_R2, jit_arg()); jit_prepare(); jit_pushargr(LIGHTREC_REG_STATE); - jit_pushargr(LIGHTREC_REG_CYCLE); - jit_pushargr(JIT_R0); - jit_pushargr(JIT_R1); - jit_finishi(c_function_wrapper); - -#if __WORDSIZE == 64 - jit_retval_i(LIGHTREC_REG_CYCLE); -#else - jit_retval(LIGHTREC_REG_CYCLE); -#endif + jit_pushargr(JIT_R2); + + jit_ldxi_ui(JIT_R2, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, target_cycle)); + + /* state->current_cycle = state->target_cycle - delta; */ + jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, LIGHTREC_REG_CYCLE); + jit_stxi_i(offsetof(struct lightrec_state, current_cycle), + LIGHTREC_REG_STATE, LIGHTREC_REG_CYCLE); + + /* Call the wrapper function */ + jit_finishr(JIT_R1); + + /* delta = state->target_cycle - state->current_cycle */; + jit_ldxi_ui(LIGHTREC_REG_CYCLE, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, current_cycle)); + jit_ldxi_ui(JIT_R1, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, target_cycle)); + jit_subr(LIGHTREC_REG_CYCLE, JIT_R1, LIGHTREC_REG_CYCLE); + + /* Restore temporaries from stack */ + for (i = 0; i < NUM_TEMPS; i++) { + if (i + FIRST_TEMP != 1) { + jit_ldxi(JIT_R(i + FIRST_TEMP), LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, wrapper_regs[i])); + } + } - jit_patch_at(jit_jmpi(), to_fn_epilog); + jit_ret(); jit_epilog(); block->_jit = _jit; - block->function = jit_emit(); block->opcode_list = NULL; - block->flags = 0; + block->flags = BLOCK_NO_OPCODE_LIST; block->nb_ops = 0; - jit_get_code(&code_size); - lightrec_register(MEM_FOR_CODE, code_size); + block->function = lightrec_emit_code(state, block, _jit, + &block->code_size); + if (!block->function) + goto err_free_block; + + state->wrappers_eps[C_WRAPPERS_COUNT - 1] = block->function; - block->code_size = code_size; + for (i = 0; i < C_WRAPPERS_COUNT - 1; i++) + state->wrappers_eps[i] = jit_address(addr[i]); if (ENABLE_DISASSEMBLER) { pr_debug("Wrapper block:\n"); @@ -644,10 +965,10 @@ static struct block * generate_wrapper(struct lightrec_state *state) static u32 lightrec_memset(struct lightrec_state *state) { - u32 kunseg_pc = kunseg(state->native_reg_cache[4]); + u32 kunseg_pc = kunseg(state->regs.gpr[4]); void *host; const struct lightrec_mem_map *map = lightrec_get_map(state, &host, kunseg_pc); - u32 length = state->native_reg_cache[5] * 4; + u32 length = state->regs.gpr[5] * 4; if (!map) { pr_err("Unable to find memory map for memset target address " @@ -655,7 +976,7 @@ static u32 lightrec_memset(struct lightrec_state *state) return 0; } - pr_debug("Calling host memset, PC 0x%x (host address 0x%lx) for %u bytes\n", + pr_debug("Calling host memset, PC 0x%x (host address 0x%" PRIxPTR ") for %u bytes\n", kunseg_pc, (uintptr_t)host, length); memset(host, 0, length); @@ -670,10 +991,9 @@ static struct block * generate_dispatcher(struct lightrec_state *state) { struct block *block; jit_state_t *_jit; - jit_node_t *to_end, *to_c, *loop, *addr, *addr2, *addr3; + jit_node_t *to_end, *loop, *addr, *addr2, *addr3; unsigned int i; - u32 offset, ram_len; - jit_word_t code_size; + u32 offset; block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block)); if (!block) @@ -689,16 +1009,12 @@ static struct block * generate_dispatcher(struct lightrec_state *state) jit_prolog(); jit_frame(256); - jit_getarg(JIT_R0, jit_arg()); -#if __WORDSIZE == 64 + jit_getarg(JIT_V1, jit_arg()); jit_getarg_i(LIGHTREC_REG_CYCLE, jit_arg()); -#else - jit_getarg(LIGHTREC_REG_CYCLE, jit_arg()); -#endif /* Force all callee-saved registers to be pushed on the stack */ for (i = 0; i < NUM_REGS; i++) - jit_movr(JIT_V(i), JIT_V(i)); + jit_movr(JIT_V(i + FIRST_REG), JIT_V(i + FIRST_REG)); /* Pass lightrec_state structure to blocks, using the last callee-saved * register that Lightning provides */ @@ -707,27 +1023,24 @@ static struct block * generate_dispatcher(struct lightrec_state *state) loop = jit_label(); /* Call the block's code */ - jit_jmpr(JIT_R0); + jit_jmpr(JIT_V1); if (OPT_REPLACE_MEMSET) { /* Blocks will jump here when they need to call * lightrec_memset() */ addr3 = jit_indirect(); + jit_movr(JIT_V1, LIGHTREC_REG_CYCLE); + jit_prepare(); jit_pushargr(LIGHTREC_REG_STATE); jit_finishi(lightrec_memset); -#if __WORDSIZE == 64 jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, native_reg_cache[31])); -#else - jit_ldxi_i(JIT_V0, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, native_reg_cache[31])); -#endif + offsetof(struct lightrec_state, regs.gpr[31])); - jit_retval(JIT_R0); - jit_subr(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, JIT_R0); + jit_retval(LIGHTREC_REG_CYCLE); + jit_subr(LIGHTREC_REG_CYCLE, JIT_V1, LIGHTREC_REG_CYCLE); } /* The block will jump here, with the number of cycles remaining in @@ -742,43 +1055,53 @@ static struct block * generate_dispatcher(struct lightrec_state *state) to_end = jit_blei(LIGHTREC_REG_CYCLE, 0); /* Convert next PC to KUNSEG and avoid mirrors */ - ram_len = state->maps[PSX_MAP_KERNEL_USER_RAM].length; - jit_andi(JIT_R0, JIT_V0, 0x10000000 | (ram_len - 1)); - to_c = jit_bgei(JIT_R0, ram_len); - - /* Fast path: code is running from RAM, use the code LUT */ -#if __WORDSIZE == 64 - jit_lshi(JIT_R0, JIT_R0, 1); -#endif - jit_addr(JIT_R0, JIT_R0, LIGHTREC_REG_STATE); - jit_ldxi(JIT_R0, JIT_R0, offsetof(struct lightrec_state, code_lut)); + jit_andi(JIT_V1, JIT_V0, 0x10000000 | (RAM_SIZE - 1)); + jit_rshi_u(JIT_R1, JIT_V1, 28); + jit_andi(JIT_R2, JIT_V0, BIOS_SIZE - 1); + jit_addi(JIT_R2, JIT_R2, RAM_SIZE); + jit_movnr(JIT_V1, JIT_R2, JIT_R1); + + /* If possible, use the code LUT */ + if (!lut_is_32bit(state)) + jit_lshi(JIT_V1, JIT_V1, 1); + jit_addr(JIT_V1, JIT_V1, LIGHTREC_REG_STATE); + + offset = offsetof(struct lightrec_state, code_lut); + if (lut_is_32bit(state)) + jit_ldxi_ui(JIT_V1, JIT_V1, offset); + else + jit_ldxi(JIT_V1, JIT_V1, offset); /* If we get non-NULL, loop */ - jit_patch_at(jit_bnei(JIT_R0, 0), loop); + jit_patch_at(jit_bnei(JIT_V1, 0), loop); + + /* The code LUT will be set to this address when the block at the target + * PC has been preprocessed but not yet compiled by the threaded + * recompiler */ + addr = jit_indirect(); /* Slow path: call C function get_next_block_func() */ - jit_patch(to_c); if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) { /* We may call the interpreter - update state->current_cycle */ jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, offsetof(struct lightrec_state, target_cycle)); - jit_subr(JIT_R1, JIT_R2, LIGHTREC_REG_CYCLE); + jit_subr(JIT_V1, JIT_R2, LIGHTREC_REG_CYCLE); jit_stxi_i(offsetof(struct lightrec_state, current_cycle), - LIGHTREC_REG_STATE, JIT_R1); + LIGHTREC_REG_STATE, JIT_V1); } - /* The code LUT will be set to this address when the block at the target - * PC has been preprocessed but not yet compiled by the threaded - * recompiler */ - addr = jit_indirect(); - - /* Get the next block */ jit_prepare(); jit_pushargr(LIGHTREC_REG_STATE); jit_pushargr(JIT_V0); + + /* Save the cycles register if needed */ + if (!(ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES)) + jit_movr(JIT_V0, LIGHTREC_REG_CYCLE); + + /* Get the next block */ jit_finishi(&get_next_block_func); - jit_retval(JIT_R0); + jit_retval(JIT_V1); if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) { /* The interpreter may have updated state->current_cycle and @@ -788,10 +1111,12 @@ static struct block * generate_dispatcher(struct lightrec_state *state) jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, offsetof(struct lightrec_state, target_cycle)); jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, JIT_R1); + } else { + jit_movr(LIGHTREC_REG_CYCLE, JIT_V0); } /* If we get non-NULL, loop */ - jit_patch_at(jit_bnei(JIT_R0, 0), loop); + jit_patch_at(jit_bnei(JIT_V1, 0), loop); /* When exiting, the recompiled code will jump to that address */ jit_note(__FILE__, __LINE__); @@ -801,15 +1126,14 @@ static struct block * generate_dispatcher(struct lightrec_state *state) jit_epilog(); block->_jit = _jit; - block->function = jit_emit(); block->opcode_list = NULL; - block->flags = 0; + block->flags = BLOCK_NO_OPCODE_LIST; block->nb_ops = 0; - jit_get_code(&code_size); - lightrec_register(MEM_FOR_CODE, code_size); - - block->code_size = code_size; + block->function = lightrec_emit_code(state, block, _jit, + &block->code_size); + if (!block->function) + goto err_free_block; state->eob_wrapper_func = jit_address(addr2); if (OPT_REPLACE_MEMSET) @@ -834,12 +1158,12 @@ static struct block * generate_dispatcher(struct lightrec_state *state) union code lightrec_read_opcode(struct lightrec_state *state, u32 pc) { - void *host; + void *host = NULL; lightrec_get_map(state, &host, kunseg(pc)); const u32 *code = (u32 *)host; - return (union code) *code; + return (union code) LE32TOH(*code); } unsigned int lightrec_cycles_of_opcode(union code code) @@ -847,11 +1171,13 @@ unsigned int lightrec_cycles_of_opcode(union code code) return 2; } -void lightrec_free_opcode_list(struct lightrec_state *state, struct block *block) +void lightrec_free_opcode_list(struct lightrec_state *state, struct opcode *ops) { + struct opcode_list *list = container_of(ops, struct opcode_list, ops); + lightrec_free(state, MEM_FOR_IR, - sizeof(*block->opcode_list) * block->nb_ops, - block->opcode_list); + sizeof(*list) + list->nb_ops * sizeof(struct opcode), + list); } static unsigned int lightrec_get_mips_block_len(const u32 *src) @@ -873,25 +1199,28 @@ static unsigned int lightrec_get_mips_block_len(const u32 *src) static struct opcode * lightrec_disassemble(struct lightrec_state *state, const u32 *src, unsigned int *len) { - struct opcode *list; + struct opcode_list *list; unsigned int i, length; length = lightrec_get_mips_block_len(src); - list = lightrec_malloc(state, MEM_FOR_IR, sizeof(*list) * length); + list = lightrec_malloc(state, MEM_FOR_IR, + sizeof(*list) + sizeof(struct opcode) * length); if (!list) { pr_err("Unable to allocate memory\n"); return NULL; } + list->nb_ops = (u16) length; + for (i = 0; i < length; i++) { - list[i].opcode = LE32TOH(src[i]); - list[i].flags = 0; + list->ops[i].opcode = LE32TOH(src[i]); + list->ops[i].flags = 0; } *len = length * sizeof(u32); - return list; + return list->ops; } static struct block * lightrec_precompile_block(struct lightrec_state *state, @@ -899,10 +1228,12 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state, { struct opcode *list; struct block *block; - void *host; + void *host, *addr; const struct lightrec_mem_map *map = lightrec_get_map(state, &host, kunseg(pc)); const u32 *code = (u32 *) host; unsigned int length; + bool fully_tagged; + u8 block_flags = 0; if (!map) return NULL; @@ -927,9 +1258,7 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state, block->next = NULL; block->flags = 0; block->code_size = 0; -#if ENABLE_THREADED_COMPILER - block->op_list_freed = (atomic_flag)ATOMIC_FLAG_INIT; -#endif + block->precompile_date = state->current_cycle; block->nb_ops = length / sizeof(u32); lightrec_optimize(state, block); @@ -939,7 +1268,7 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state, lightrec_register(MEM_FOR_MIPS_CODE, length); if (ENABLE_DISASSEMBLER) { - pr_debug("Disassembled block at PC: 0x%x\n", block->pc); + pr_debug("Disassembled block at PC: 0x%08x\n", block->pc); lightrec_print_disassembly(block, code); } @@ -948,13 +1277,23 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state, /* If the first opcode is an 'impossible' branch, never compile the * block */ if (should_emulate(block->opcode_list)) - block->flags |= BLOCK_NEVER_COMPILE; + block_flags |= BLOCK_NEVER_COMPILE; + + fully_tagged = lightrec_block_is_fully_tagged(block); + if (fully_tagged) + block_flags |= BLOCK_FULLY_TAGGED; - if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET)) - state->code_lut[lut_offset(pc)] = state->memset_func; + if (block_flags) + block_set_flags(block, block_flags); block->hash = lightrec_calculate_block_hash(block); + if (OPT_REPLACE_MEMSET && block_has_flag(block, BLOCK_IS_MEMSET)) + addr = state->memset_func; + else + addr = state->get_next_block; + lut_write(state, lut_offset(pc), addr); + pr_debug("Recompile count: %u\n", state->nb_precompile++); return block; @@ -986,10 +1325,10 @@ static bool lightrec_block_is_fully_tagged(const struct block *block) case OP_SWR: case OP_LWC2: case OP_SWC2: - if (!(op->flags & (LIGHTREC_DIRECT_IO | - LIGHTREC_HW_IO))) + if (!LIGHTREC_FLAGS_GET_IO_MODE(op->flags)) return false; - default: /* fall-through */ + fallthrough; + default: continue; } } @@ -1002,6 +1341,7 @@ static void lightrec_reap_block(struct lightrec_state *state, void *data) struct block *block = data; pr_debug("Reap dead block at PC 0x%08x\n", block->pc); + lightrec_unregister_block(state->block_cache, block); lightrec_free_block(state, block); } @@ -1010,35 +1350,58 @@ static void lightrec_reap_jit(struct lightrec_state *state, void *data) _jit_destroy_state(data); } -int lightrec_compile_block(struct lightrec_state *state, struct block *block) +static void lightrec_free_function(struct lightrec_state *state, void *fn) +{ + if (ENABLE_CODE_BUFFER && state->tlsf) { + pr_debug("Freeing code block at 0x%" PRIxPTR "\n", (uintptr_t) fn); + lightrec_free_code(state, fn); + } +} + +static void lightrec_reap_function(struct lightrec_state *state, void *data) +{ + lightrec_free_function(state, data); +} + +static void lightrec_reap_opcode_list(struct lightrec_state *state, void *data) { + lightrec_free_opcode_list(state, data); +} + +int lightrec_compile_block(struct lightrec_cstate *cstate, + struct block *block) +{ + struct lightrec_state *state = cstate->state; struct lightrec_branch_target *target; - bool op_list_freed = false, fully_tagged = false; + bool fully_tagged = false; struct block *block2; struct opcode *elm; jit_state_t *_jit, *oldjit; jit_node_t *start_of_block; bool skip_next = false; - jit_word_t code_size; + void *old_fn, *new_fn; + size_t old_code_size; unsigned int i, j; + u8 old_flags; u32 offset; fully_tagged = lightrec_block_is_fully_tagged(block); if (fully_tagged) - block->flags |= BLOCK_FULLY_TAGGED; + block_set_flags(block, BLOCK_FULLY_TAGGED); _jit = jit_new_state(); if (!_jit) return -ENOMEM; oldjit = block->_jit; + old_fn = block->function; + old_code_size = block->code_size; block->_jit = _jit; - lightrec_regcache_reset(state->reg_cache); - state->cycles = 0; - state->nb_branches = 0; - state->nb_local_branches = 0; - state->nb_targets = 0; + lightrec_regcache_reset(cstate->reg_cache); + cstate->cycles = 0; + cstate->nb_local_branches = 0; + cstate->nb_targets = 0; jit_prolog(); jit_tramp(256); @@ -1053,33 +1416,29 @@ int lightrec_compile_block(struct lightrec_state *state, struct block *block) continue; } - state->cycles += lightrec_cycles_of_opcode(elm->c); - if (should_emulate(elm)) { pr_debug("Branch at offset 0x%x will be emulated\n", i << 2); - lightrec_emit_eob(state, block, i); - skip_next = !(elm->flags & LIGHTREC_NO_DS); + lightrec_emit_eob(cstate, block, i); + skip_next = !op_flag_no_ds(elm->flags); } else { - lightrec_rec_opcode(state, block, i); - skip_next = has_delay_slot(elm->c) && - !(elm->flags & LIGHTREC_NO_DS); + lightrec_rec_opcode(cstate, block, i); + skip_next = !op_flag_no_ds(elm->flags) && has_delay_slot(elm->c); #if _WIN32 /* FIXME: GNU Lightning on Windows seems to use our * mapped registers as temporaries. Until the actual bug * is found and fixed, unconditionally mark our * registers as live here. */ - lightrec_regcache_mark_live(state->reg_cache, _jit); + lightrec_regcache_mark_live(cstate->reg_cache, _jit); #endif } - } - for (i = 0; i < state->nb_branches; i++) - jit_patch(state->branches[i]); + cstate->cycles += lightrec_cycles_of_opcode(elm->c); + } - for (i = 0; i < state->nb_local_branches; i++) { - struct lightrec_branch *branch = &state->local_branches[i]; + for (i = 0; i < cstate->nb_local_branches; i++) { + struct lightrec_branch *branch = &cstate->local_branches[i]; pr_debug("Patch local branch to offset 0x%x\n", branch->target << 2); @@ -1089,105 +1448,140 @@ int lightrec_compile_block(struct lightrec_state *state, struct block *block) continue; } - for (j = 0; j < state->nb_targets; j++) { - if (state->targets[j].offset == branch->target) { + for (j = 0; j < cstate->nb_targets; j++) { + if (cstate->targets[j].offset == branch->target) { jit_patch_at(branch->branch, - state->targets[j].label); + cstate->targets[j].label); break; } } - if (j == state->nb_targets) + if (j == cstate->nb_targets) pr_err("Unable to find branch target\n"); } - jit_ldxi(JIT_R0, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, eob_wrapper_func)); - - jit_jmpr(JIT_R0); - jit_ret(); jit_epilog(); - block->function = jit_emit(); - block->flags &= ~BLOCK_SHOULD_RECOMPILE; + new_fn = lightrec_emit_code(state, block, _jit, &block->code_size); + if (!new_fn) { + if (!ENABLE_THREADED_COMPILER) + pr_err("Unable to compile block!\n"); + block->_jit = oldjit; + jit_clear_state(); + _jit_destroy_state(_jit); + return -ENOMEM; + } - /* Add compiled function to the LUT */ - state->code_lut[lut_offset(block->pc)] = block->function; + /* Pause the reaper, because lightrec_reset_lut_offset() may try to set + * the old block->function pointer to the code LUT. */ + if (ENABLE_THREADED_COMPILER) + lightrec_reaper_pause(state->reaper); - /* Fill code LUT with the block's entry points */ - for (i = 0; i < state->nb_targets; i++) { - target = &state->targets[i]; + block->function = new_fn; + block_clear_flags(block, BLOCK_SHOULD_RECOMPILE); - if (target->offset) { - offset = lut_offset(block->pc) + target->offset; - state->code_lut[offset] = jit_address(target->label); - } - } + /* Add compiled function to the LUT */ + lut_write(state, lut_offset(block->pc), block->function); + + if (ENABLE_THREADED_COMPILER) + lightrec_reaper_continue(state->reaper); /* Detect old blocks that have been covered by the new one */ - for (i = 0; i < state->nb_targets; i++) { - target = &state->targets[i]; + for (i = 0; i < cstate->nb_targets; i++) { + target = &cstate->targets[i]; if (!target->offset) continue; offset = block->pc + target->offset * sizeof(u32); + + /* Pause the reaper while we search for the block until we set + * the BLOCK_IS_DEAD flag, otherwise the block may be removed + * under our feet. */ + if (ENABLE_THREADED_COMPILER) + lightrec_reaper_pause(state->reaper); + block2 = lightrec_find_block(state->block_cache, offset); if (block2) { /* No need to check if block2 is compilable - it must * be, otherwise block wouldn't be compilable either */ - block2->flags |= BLOCK_IS_DEAD; + /* Set the "block dead" flag to prevent the dynarec from + * recompiling this block */ + old_flags = block_set_flags(block2, BLOCK_IS_DEAD); + } + + if (ENABLE_THREADED_COMPILER) { + lightrec_reaper_continue(state->reaper); + + /* If block2 was pending for compilation, cancel it. + * If it's being compiled right now, wait until it + * finishes. */ + if (block2) + lightrec_recompiler_remove(state->rec, block2); + } + + /* We know from now on that block2 (if present) isn't going to + * be compiled. We can override the LUT entry with our new + * block's entry point. */ + offset = lut_offset(block->pc) + target->offset; + lut_write(state, offset, jit_address(target->label)); + if (block2) { pr_debug("Reap block 0x%08x as it's covered by block " "0x%08x\n", block2->pc, block->pc); - lightrec_unregister_block(state->block_cache, block2); - - if (ENABLE_THREADED_COMPILER) { - lightrec_recompiler_remove(state->rec, block2); + /* Finally, reap the block. */ + if (!ENABLE_THREADED_COMPILER) { + lightrec_unregister_block(state->block_cache, block2); + lightrec_free_block(state, block2); + } else if (!(old_flags & BLOCK_IS_DEAD)) { lightrec_reaper_add(state->reaper, lightrec_reap_block, block2); - } else { - lightrec_free_block(state, block2); } } } - jit_get_code(&code_size); - lightrec_register(MEM_FOR_CODE, code_size); - - block->code_size = code_size; - if (ENABLE_DISASSEMBLER) { - pr_debug("Compiling block at PC: 0x%x\n", block->pc); + pr_debug("Compiling block at PC: 0x%08x\n", block->pc); jit_disassemble(); } jit_clear_state(); -#if ENABLE_THREADED_COMPILER if (fully_tagged) - op_list_freed = atomic_flag_test_and_set(&block->op_list_freed); -#endif - if (fully_tagged && !op_list_freed) { + old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST); + + if (fully_tagged && !(old_flags & BLOCK_NO_OPCODE_LIST)) { pr_debug("Block PC 0x%08x is fully tagged" " - free opcode list\n", block->pc); - lightrec_free_opcode_list(state, block); - block->opcode_list = NULL; + + if (ENABLE_THREADED_COMPILER) { + lightrec_reaper_add(state->reaper, + lightrec_reap_opcode_list, + block->opcode_list); + } else { + lightrec_free_opcode_list(state, block->opcode_list); + } } if (oldjit) { pr_debug("Block 0x%08x recompiled, reaping old jit context.\n", block->pc); - if (ENABLE_THREADED_COMPILER) + if (ENABLE_THREADED_COMPILER) { lightrec_reaper_add(state->reaper, lightrec_reap_jit, oldjit); - else + lightrec_reaper_add(state->reaper, + lightrec_reap_function, old_fn); + } else { _jit_destroy_state(oldjit); + lightrec_free_function(state, old_fn); + } + + lightrec_unregister(MEM_FOR_CODE, old_code_size); } return 0; @@ -1240,20 +1634,24 @@ u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle) return state->next_pc; } -u32 lightrec_execute_one(struct lightrec_state *state, u32 pc) +u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc, + u32 target_cycle) { - return lightrec_execute(state, pc, state->current_cycle); -} - -u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc) -{ - struct block *block = lightrec_get_block(state, pc); - if (!block) - return 0; + struct block *block; state->exit_flags = LIGHTREC_EXIT_NORMAL; + state->target_cycle = target_cycle; + + do { + block = lightrec_get_block(state, pc); + if (!block) + break; + + pc = lightrec_emulate_block(state, block, pc); - pc = lightrec_emulate_block(state, block, pc); + if (ENABLE_THREADED_COMPILER) + lightrec_reaper_reap(state->reaper); + } while (state->current_cycle < state->target_cycle); if (LOG_LEVEL >= INFO_L) lightrec_print_info(state); @@ -1263,64 +1661,117 @@ u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc) void lightrec_free_block(struct lightrec_state *state, struct block *block) { + u8 old_flags; + lightrec_unregister(MEM_FOR_MIPS_CODE, block->nb_ops * sizeof(u32)); - if (block->opcode_list) - lightrec_free_opcode_list(state, block); + old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST); + + if (!(old_flags & BLOCK_NO_OPCODE_LIST)) + lightrec_free_opcode_list(state, block->opcode_list); if (block->_jit) _jit_destroy_state(block->_jit); - lightrec_unregister(MEM_FOR_CODE, block->code_size); + if (block->function) { + lightrec_free_function(state, block->function); + lightrec_unregister(MEM_FOR_CODE, block->code_size); + } lightrec_free(state, MEM_FOR_IR, sizeof(*block), block); } +struct lightrec_cstate * lightrec_create_cstate(struct lightrec_state *state) +{ + struct lightrec_cstate *cstate; + + cstate = lightrec_malloc(state, MEM_FOR_LIGHTREC, sizeof(*cstate)); + if (!cstate) + return NULL; + + cstate->reg_cache = lightrec_regcache_init(state); + if (!cstate->reg_cache) { + lightrec_free(state, MEM_FOR_LIGHTREC, sizeof(*cstate), cstate); + return NULL; + } + + cstate->state = state; + + return cstate; +} + +void lightrec_free_cstate(struct lightrec_cstate *cstate) +{ + lightrec_free_regcache(cstate->reg_cache); + lightrec_free(cstate->state, MEM_FOR_LIGHTREC, sizeof(*cstate), cstate); +} + struct lightrec_state * lightrec_init(char *argv0, const struct lightrec_mem_map *map, size_t nb, const struct lightrec_ops *ops) { + const struct lightrec_mem_map *codebuf_map = &map[PSX_MAP_CODE_BUFFER]; struct lightrec_state *state; + uintptr_t addr; + void *tlsf = NULL; + bool with_32bit_lut = false; + size_t lut_size; /* Sanity-check ops */ - if (!ops || - !ops->cop0_ops.mfc || !ops->cop0_ops.cfc || !ops->cop0_ops.mtc || - !ops->cop0_ops.ctc || !ops->cop0_ops.op || - !ops->cop2_ops.mfc || !ops->cop2_ops.cfc || !ops->cop2_ops.mtc || - !ops->cop2_ops.ctc || !ops->cop2_ops.op) { + if (!ops || !ops->cop2_op || !ops->enable_ram) { pr_err("Missing callbacks in lightrec_ops structure\n"); return NULL; } + if (ops->cop2_notify) + pr_debug("Optional cop2_notify callback in lightrec_ops\n"); + else + pr_debug("No optional cop2_notify callback in lightrec_ops\n"); + + if (ENABLE_CODE_BUFFER && nb > PSX_MAP_CODE_BUFFER + && codebuf_map->address) { + tlsf = tlsf_create_with_pool(codebuf_map->address, + codebuf_map->length); + if (!tlsf) { + pr_err("Unable to initialize code buffer\n"); + return NULL; + } + + if (__WORDSIZE == 64) { + addr = (uintptr_t) codebuf_map->address + codebuf_map->length - 1; + with_32bit_lut = addr == (u32) addr; + } + } + + if (with_32bit_lut) + lut_size = CODE_LUT_SIZE * 4; + else + lut_size = CODE_LUT_SIZE * sizeof(void *); + init_jit(argv0); - state = calloc(1, sizeof(*state) + - sizeof(*state->code_lut) * CODE_LUT_SIZE); + state = calloc(1, sizeof(*state) + lut_size); if (!state) goto err_finish_jit; - lightrec_register(MEM_FOR_LIGHTREC, sizeof(*state) + - sizeof(*state->code_lut) * CODE_LUT_SIZE); + lightrec_register(MEM_FOR_LIGHTREC, sizeof(*state) + lut_size); -#if ENABLE_TINYMM - state->tinymm = tinymm_init(malloc, free, 4096); - if (!state->tinymm) - goto err_free_state; -#endif + state->tlsf = tlsf; + state->with_32bit_lut = with_32bit_lut; state->block_cache = lightrec_blockcache_init(state); if (!state->block_cache) - goto err_free_tinymm; - - state->reg_cache = lightrec_regcache_init(state); - if (!state->reg_cache) - goto err_free_block_cache; + goto err_free_state; if (ENABLE_THREADED_COMPILER) { state->rec = lightrec_recompiler_init(state); if (!state->rec) - goto err_free_reg_cache; + goto err_free_block_cache; state->reaper = lightrec_reaper_init(state); if (!state->reaper) goto err_free_recompiler; + } else { + state->cstate = lightrec_create_cstate(state); + if (!state->cstate) + goto err_free_block_cache; } state->nb_maps = nb; @@ -1336,16 +1787,11 @@ struct lightrec_state * lightrec_init(char *argv0, if (!state->c_wrapper_block) goto err_free_dispatcher; - state->c_wrapper = state->c_wrapper_block->function; - state->c_wrappers[C_WRAPPER_RW] = lightrec_rw_cb; state->c_wrappers[C_WRAPPER_RW_GENERIC] = lightrec_rw_generic_cb; state->c_wrappers[C_WRAPPER_MFC] = lightrec_mfc_cb; state->c_wrappers[C_WRAPPER_MTC] = lightrec_mtc_cb; - state->c_wrappers[C_WRAPPER_RFE] = lightrec_rfe_cb; state->c_wrappers[C_WRAPPER_CP] = lightrec_cp_cb; - state->c_wrappers[C_WRAPPER_SYSCALL] = lightrec_syscall_cb; - state->c_wrappers[C_WRAPPER_BREAK] = lightrec_break_cb; map = &state->maps[PSX_MAP_BIOS]; state->offset_bios = (uintptr_t)map->address - map->pc; @@ -1353,6 +1799,9 @@ struct lightrec_state * lightrec_init(char *argv0, map = &state->maps[PSX_MAP_SCRATCH_PAD]; state->offset_scratch = (uintptr_t)map->address - map->pc; + map = &state->maps[PSX_MAP_HW_REGISTERS]; + state->offset_io = (uintptr_t)map->address - map->pc; + map = &state->maps[PSX_MAP_KERNEL_USER_RAM]; state->offset_ram = (uintptr_t)map->address - map->pc; @@ -1364,12 +1813,16 @@ struct lightrec_state * lightrec_init(char *argv0, if (state->offset_bios == 0 && state->offset_scratch == 0 && state->offset_ram == 0 && + state->offset_io == 0 && state->mirrors_mapped) { pr_info("Memory map is perfect. Emitted code will be best.\n"); } else { pr_info("Memory map is sub-par. Emitted code will be slow.\n"); } + if (state->with_32bit_lut) + pr_info("Using 32-bit LUT\n"); + return state; err_free_dispatcher: @@ -1380,20 +1833,18 @@ struct lightrec_state * lightrec_init(char *argv0, err_free_recompiler: if (ENABLE_THREADED_COMPILER) lightrec_free_recompiler(state->rec); -err_free_reg_cache: - lightrec_free_regcache(state->reg_cache); + else + lightrec_free_cstate(state->cstate); err_free_block_cache: lightrec_free_block_cache(state->block_cache); -err_free_tinymm: -#if ENABLE_TINYMM - tinymm_shutdown(state->tinymm); err_free_state: -#endif lightrec_unregister(MEM_FOR_LIGHTREC, sizeof(*state) + - sizeof(*state->code_lut) * CODE_LUT_SIZE); + lut_elm_size(state) * CODE_LUT_SIZE); free(state); err_finish_jit: finish_jit(); + if (ENABLE_CODE_BUFFER && tlsf) + tlsf_destroy(tlsf); return NULL; } @@ -1403,44 +1854,51 @@ void lightrec_destroy(struct lightrec_state *state) state->current_cycle = ~state->current_cycle; lightrec_print_info(state); + lightrec_free_block_cache(state->block_cache); + lightrec_free_block(state, state->dispatcher); + lightrec_free_block(state, state->c_wrapper_block); + if (ENABLE_THREADED_COMPILER) { lightrec_free_recompiler(state->rec); lightrec_reaper_destroy(state->reaper); + } else { + lightrec_free_cstate(state->cstate); } - lightrec_free_regcache(state->reg_cache); - lightrec_free_block_cache(state->block_cache); - lightrec_free_block(state, state->dispatcher); - lightrec_free_block(state, state->c_wrapper_block); finish_jit(); + if (ENABLE_CODE_BUFFER && state->tlsf) + tlsf_destroy(state->tlsf); -#if ENABLE_TINYMM - tinymm_shutdown(state->tinymm); -#endif lightrec_unregister(MEM_FOR_LIGHTREC, sizeof(*state) + - sizeof(*state->code_lut) * CODE_LUT_SIZE); + lut_elm_size(state) * CODE_LUT_SIZE); free(state); } void lightrec_invalidate(struct lightrec_state *state, u32 addr, u32 len) { u32 kaddr = kunseg(addr & ~0x3); - const struct lightrec_mem_map *map = lightrec_get_map(state, NULL, kaddr); - - if (map) { - if (map != &state->maps[PSX_MAP_KERNEL_USER_RAM]) - return; + enum psx_map idx = lightrec_get_map_idx(state, kaddr); + switch (idx) { + case PSX_MAP_MIRROR1: + case PSX_MAP_MIRROR2: + case PSX_MAP_MIRROR3: /* Handle mirrors */ - kaddr &= (state->maps[PSX_MAP_KERNEL_USER_RAM].length - 1); - - lightrec_invalidate_map(state, map, kaddr, len); + kaddr &= RAM_SIZE - 1; + fallthrough; + case PSX_MAP_KERNEL_USER_RAM: + break; + default: + return; } + + memset(lut_address(state, lut_offset(kaddr)), 0, + ((len + 3) / 4) * lut_elm_size(state)); } void lightrec_invalidate_all(struct lightrec_state *state) { - memset(state->code_lut, 0, sizeof(*state->code_lut) * CODE_LUT_SIZE); + memset(state->code_lut, 0, lut_elm_size(state) * CODE_LUT_SIZE); } void lightrec_set_invalidate_mode(struct lightrec_state *state, bool dma_only) @@ -1464,16 +1922,6 @@ u32 lightrec_exit_flags(struct lightrec_state *state) return state->exit_flags; } -void lightrec_dump_registers(struct lightrec_state *state, u32 regs[34]) -{ - memcpy(regs, state->native_reg_cache, sizeof(state->native_reg_cache)); -} - -void lightrec_restore_registers(struct lightrec_state *state, u32 regs[34]) -{ - memcpy(state->native_reg_cache, regs, sizeof(state->native_reg_cache)); -} - u32 lightrec_current_cycle_count(const struct lightrec_state *state) { return state->current_cycle; @@ -1496,3 +1944,8 @@ void lightrec_set_target_cycle_count(struct lightrec_state *state, u32 cycles) state->target_cycle = cycles; } } + +struct lightrec_registers * lightrec_get_registers(struct lightrec_state *state) +{ + return &state->regs; +} diff --git a/deps/lightrec/lightrec.h b/deps/lightrec/lightrec.h index 1a2b5426a..310036ced 100644 --- a/deps/lightrec/lightrec.h +++ b/deps/lightrec/lightrec.h @@ -43,10 +43,11 @@ struct lightrec_mem_map; /* Exit flags */ #define LIGHTREC_EXIT_NORMAL (0) -#define LIGHTREC_EXIT_SYSCALL (1 << 0) +#define LIGHTREC_EXIT_CHECK_INTERRUPT (1 << 0) #define LIGHTREC_EXIT_BREAK (1 << 1) -#define LIGHTREC_EXIT_CHECK_INTERRUPT (1 << 2) +#define LIGHTREC_EXIT_SYSCALL (1 << 2) #define LIGHTREC_EXIT_SEGFAULT (1 << 3) +#define LIGHTREC_EXIT_NOMEM (1 << 4) enum psx_map { PSX_MAP_KERNEL_USER_RAM, @@ -58,6 +59,9 @@ enum psx_map { PSX_MAP_MIRROR1, PSX_MAP_MIRROR2, PSX_MAP_MIRROR3, + PSX_MAP_CODE_BUFFER, + + PSX_MAP_UNKNOWN, }; struct lightrec_mem_map_ops { @@ -80,17 +84,18 @@ struct lightrec_mem_map { const struct lightrec_mem_map *mirror_of; }; -struct lightrec_cop_ops { - u32 (*mfc)(struct lightrec_state *state, u32 op, u8 reg); - u32 (*cfc)(struct lightrec_state *state, u32 op, u8 reg); - void (*mtc)(struct lightrec_state *state, u32 op, u8 reg, u32 value); - void (*ctc)(struct lightrec_state *state, u32 op, u8 reg, u32 value); - void (*op)(struct lightrec_state *state, u32 op); +struct lightrec_ops { + void (*cop2_notify)(struct lightrec_state *state, u32 op, u32 data); + void (*cop2_op)(struct lightrec_state *state, u32 op); + void (*enable_ram)(struct lightrec_state *state, _Bool enable); + _Bool (*hw_direct)(u32 kaddr, _Bool is_write, u8 size); }; -struct lightrec_ops { - struct lightrec_cop_ops cop0_ops; - struct lightrec_cop_ops cop2_ops; +struct lightrec_registers { + u32 gpr[34]; + u32 cp0[32]; + u32 cp2d[32]; + u32 cp2c[32]; }; __api struct lightrec_state *lightrec_init(char *argv0, @@ -102,8 +107,8 @@ __api void lightrec_destroy(struct lightrec_state *state); __api u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle); -__api u32 lightrec_execute_one(struct lightrec_state *state, u32 pc); -__api u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc); +__api u32 lightrec_run_interpreter(struct lightrec_state *state, + u32 pc, u32 target_cycle); __api void lightrec_invalidate(struct lightrec_state *state, u32 addr, u32 len); __api void lightrec_invalidate_all(struct lightrec_state *state); @@ -113,9 +118,7 @@ __api void lightrec_set_invalidate_mode(struct lightrec_state *state, __api void lightrec_set_exit_flags(struct lightrec_state *state, u32 flags); __api u32 lightrec_exit_flags(struct lightrec_state *state); -__api void lightrec_dump_registers(struct lightrec_state *state, u32 regs[34]); -__api void lightrec_restore_registers(struct lightrec_state *state, - u32 regs[34]); +__api struct lightrec_registers * lightrec_get_registers(struct lightrec_state *state); __api u32 lightrec_current_cycle_count(const struct lightrec_state *state); __api void lightrec_reset_cycle_count(struct lightrec_state *state, u32 cycles); diff --git a/deps/lightrec/memmanager.c b/deps/lightrec/memmanager.c index fb626ded4..c7502cdba 100644 --- a/deps/lightrec/memmanager.c +++ b/deps/lightrec/memmanager.c @@ -3,14 +3,11 @@ * Copyright (C) 2019-2021 Paul Cercueil */ -#include "config.h" +#include "lightrec-config.h" #include "lightrec-private.h" #include "memmanager.h" #include -#if ENABLE_TINYMM -#include -#endif #ifdef ENABLE_THREADED_COMPILER #include @@ -67,12 +64,7 @@ void * lightrec_malloc(struct lightrec_state *state, { void *ptr; -#if ENABLE_TINYMM - if (type == MEM_FOR_IR) - ptr = tinymm_malloc(state->tinymm, len); - else -#endif - ptr = malloc(len); + ptr = malloc(len); if (!ptr) return NULL; @@ -86,12 +78,7 @@ void * lightrec_calloc(struct lightrec_state *state, { void *ptr; -#if ENABLE_TINYMM - if (type == MEM_FOR_IR) - ptr = tinymm_zalloc(state->tinymm, len); - else -#endif - ptr = calloc(1, len); + ptr = calloc(1, len); if (!ptr) return NULL; @@ -104,12 +91,7 @@ void lightrec_free(struct lightrec_state *state, enum mem_type type, unsigned int len, void *ptr) { lightrec_unregister(type, len); -#if ENABLE_TINYMM - if (type == MEM_FOR_IR) - tinymm_free(state->tinymm, ptr); - else -#endif - free(ptr); + free(ptr); } float lightrec_get_average_ipi(void) diff --git a/deps/lightrec/optimizer.c b/deps/lightrec/optimizer.c index b7d538be1..10067a7d0 100644 --- a/deps/lightrec/optimizer.c +++ b/deps/lightrec/optimizer.c @@ -3,7 +3,7 @@ * Copyright (C) 2014-2021 Paul Cercueil */ -#include "config.h" +#include "lightrec-config.h" #include "disassembler.h" #include "lightrec.h" #include "memmanager.h" @@ -22,6 +22,8 @@ struct optimizer_list { unsigned int nb_optimizers; }; +static bool is_nop(union code op); + bool is_unconditional_jump(union code c) { switch (c.i.op) { @@ -67,6 +69,9 @@ static u64 opcode_read_mask(union code op) case OP_SPECIAL_MFLO: return BIT(REG_LO); case OP_SPECIAL_SLL: + if (!op.r.imm) + return 0; + fallthrough; case OP_SPECIAL_SRL: case OP_SPECIAL_SRA: return BIT(op.r.rt); @@ -97,6 +102,9 @@ static u64 opcode_read_mask(union code op) case OP_LUI: return 0; case OP_BEQ: + if (op.i.rs == op.i.rt) + return 0; + fallthrough; case OP_BNE: case OP_LWL: case OP_LWR: @@ -111,11 +119,31 @@ static u64 opcode_read_mask(union code op) } } -static u64 opcode_write_mask(union code op) +static u64 mult_div_write_mask(union code op) { u64 flags; + if (!OPT_FLAG_MULT_DIV) + return BIT(REG_LO) | BIT(REG_HI); + + if (op.r.rd) + flags = BIT(op.r.rd); + else + flags = BIT(REG_LO); + if (op.r.imm) + flags |= BIT(op.r.imm); + else + flags |= BIT(REG_HI); + + return flags; +} + +static u64 opcode_write_mask(union code op) +{ switch (op.i.op) { + case OP_META_MULT2: + case OP_META_MULTU2: + return mult_div_write_mask(op); case OP_SPECIAL: switch (op.r.op) { case OP_SPECIAL_JR: @@ -126,22 +154,15 @@ static u64 opcode_write_mask(union code op) case OP_SPECIAL_MULTU: case OP_SPECIAL_DIV: case OP_SPECIAL_DIVU: - if (!OPT_FLAG_MULT_DIV) - return BIT(REG_LO) | BIT(REG_HI); - - if (op.r.rd) - flags = BIT(op.r.rd); - else - flags = BIT(REG_LO); - if (op.r.imm) - flags |= BIT(op.r.imm); - else - flags |= BIT(REG_HI); - return flags; + return mult_div_write_mask(op); case OP_SPECIAL_MTHI: return BIT(REG_HI); case OP_SPECIAL_MTLO: return BIT(REG_LO); + case OP_SPECIAL_SLL: + if (!op.r.imm) + return 0; + fallthrough; default: return BIT(op.r.rd); } @@ -160,6 +181,8 @@ static u64 opcode_write_mask(union code op) case OP_LBU: case OP_LHU: case OP_LWR: + case OP_META_EXTC: + case OP_META_EXTS: return BIT(op.i.rt); case OP_JAL: return BIT(31); @@ -207,6 +230,116 @@ bool opcode_writes_register(union code op, u8 reg) return opcode_write_mask(op) & BIT(reg); } +static int find_prev_writer(const struct opcode *list, unsigned int offset, u8 reg) +{ + union code c; + unsigned int i; + + if (op_flag_sync(list[offset].flags)) + return -1; + + for (i = offset; i > 0; i--) { + c = list[i - 1].c; + + if (opcode_writes_register(c, reg)) { + if (i > 1 && has_delay_slot(list[i - 2].c)) + break; + + return i - 1; + } + + if (op_flag_sync(list[i - 1].flags) || + has_delay_slot(c) || + opcode_reads_register(c, reg)) + break; + } + + return -1; +} + +static int find_next_reader(const struct opcode *list, unsigned int offset, u8 reg) +{ + unsigned int i; + union code c; + + if (op_flag_sync(list[offset].flags)) + return -1; + + for (i = offset; ; i++) { + c = list[i].c; + + if (opcode_reads_register(c, reg)) { + if (i > 0 && has_delay_slot(list[i - 1].c)) + break; + + return i; + } + + if (op_flag_sync(list[i].flags) || + has_delay_slot(c) || opcode_writes_register(c, reg)) + break; + } + + return -1; +} + +static bool reg_is_dead(const struct opcode *list, unsigned int offset, u8 reg) +{ + unsigned int i; + + if (op_flag_sync(list[offset].flags)) + return false; + + for (i = offset + 1; ; i++) { + if (opcode_reads_register(list[i].c, reg)) + return false; + + if (opcode_writes_register(list[i].c, reg)) + return true; + + if (has_delay_slot(list[i].c)) { + if (op_flag_no_ds(list[i].flags) || + opcode_reads_register(list[i + 1].c, reg)) + return false; + + return opcode_writes_register(list[i + 1].c, reg); + } + } +} + +static bool reg_is_read(const struct opcode *list, + unsigned int a, unsigned int b, u8 reg) +{ + /* Return true if reg is read in one of the opcodes of the interval + * [a, b[ */ + for (; a < b; a++) { + if (!is_nop(list[a].c) && opcode_reads_register(list[a].c, reg)) + return true; + } + + return false; +} + +static bool reg_is_written(const struct opcode *list, + unsigned int a, unsigned int b, u8 reg) +{ + /* Return true if reg is written in one of the opcodes of the interval + * [a, b[ */ + + for (; a < b; a++) { + if (!is_nop(list[a].c) && opcode_writes_register(list[a].c, reg)) + return true; + } + + return false; +} + +static bool reg_is_read_or_written(const struct opcode *list, + unsigned int a, unsigned int b, u8 reg) +{ + return reg_is_read(list, a, b, reg) || reg_is_written(list, a, b, reg); +} + static bool opcode_is_load(union code op) { switch (op.i.op) { @@ -239,6 +372,22 @@ static bool opcode_is_store(union code op) } } +static u8 opcode_get_io_size(union code op) +{ + switch (op.i.op) { + case OP_LB: + case OP_LBU: + case OP_SB: + return 8; + case OP_LH: + case OP_LHU: + case OP_SH: + return 16; + default: + return 32; + } +} + bool opcode_is_io(union code op) { return opcode_is_load(op) || opcode_is_store(op); @@ -348,8 +497,19 @@ bool load_in_delay_slot(union code op) return false; } -static u32 lightrec_propagate_consts(union code c, u32 known, u32 *v) +static u32 lightrec_propagate_consts(const struct opcode *op, + const struct opcode *prev, + u32 known, u32 *v) { + union code c = prev->c; + + /* Register $zero is always, well, zero */ + known |= BIT(0); + v[0] = 0; + + if (op_flag_sync(op->flags)) + return BIT(0); + switch (c.i.op) { case OP_SPECIAL: switch (c.r.op) { @@ -468,10 +628,52 @@ static u32 lightrec_propagate_consts(union code c, u32 known, u32 *v) known &= ~BIT(c.r.rd); } break; + case OP_SPECIAL_MULT: + case OP_SPECIAL_MULTU: + case OP_SPECIAL_DIV: + case OP_SPECIAL_DIVU: + if (OPT_FLAG_MULT_DIV && c.r.rd) + known &= ~BIT(c.r.rd); + if (OPT_FLAG_MULT_DIV && c.r.imm) + known &= ~BIT(c.r.imm); + break; + case OP_SPECIAL_MFLO: + case OP_SPECIAL_MFHI: + known &= ~BIT(c.r.rd); + break; default: break; } break; + case OP_META_MULT2: + case OP_META_MULTU2: + if (OPT_FLAG_MULT_DIV && (known & BIT(c.r.rs))) { + if (c.r.rd) { + known |= BIT(c.r.rd); + + if (c.r.op < 32) + v[c.r.rd] = v[c.r.rs] << c.r.op; + else + v[c.r.rd] = 0; + } + + if (c.r.imm) { + known |= BIT(c.r.imm); + + if (c.r.op >= 32) + v[c.r.imm] = v[c.r.rs] << (c.r.op - 32); + else if (c.i.op == OP_META_MULT2) + v[c.r.imm] = (s32) v[c.r.rs] >> (32 - c.r.op); + else + v[c.r.imm] = v[c.r.rs] >> (32 - c.r.op); + } + } else { + if (OPT_FLAG_MULT_DIV && c.r.rd) + known &= ~BIT(c.r.rd); + if (OPT_FLAG_MULT_DIV && c.r.imm) + known &= ~BIT(c.r.imm); + } + break; case OP_REGIMM: break; case OP_ADDI: @@ -563,6 +765,22 @@ static u32 lightrec_propagate_consts(union code c, u32 known, u32 *v) known &= ~BIT(c.r.rd); } break; + case OP_META_EXTC: + if (known & BIT(c.i.rs)) { + known |= BIT(c.i.rt); + v[c.i.rt] = (s32)(s8)v[c.i.rs]; + } else { + known &= ~BIT(c.i.rt); + } + break; + case OP_META_EXTS: + if (known & BIT(c.i.rs)) { + known |= BIT(c.i.rt); + v[c.i.rt] = (s32)(s16)v[c.i.rs]; + } else { + known &= ~BIT(c.i.rt); + } + break; default: break; } @@ -570,90 +788,353 @@ static u32 lightrec_propagate_consts(union code c, u32 known, u32 *v) return known; } +static void lightrec_optimize_sll_sra(struct opcode *list, unsigned int offset) +{ + struct opcode *prev, *prev2 = NULL, *curr = &list[offset]; + struct opcode *to_change, *to_nop; + int idx, idx2; + + if (curr->r.imm != 24 && curr->r.imm != 16) + return; + + idx = find_prev_writer(list, offset, curr->r.rt); + if (idx < 0) + return; + + prev = &list[idx]; + + if (prev->i.op != OP_SPECIAL || prev->r.op != OP_SPECIAL_SLL || + prev->r.imm != curr->r.imm || prev->r.rd != curr->r.rt) + return; + + if (prev->r.rd != prev->r.rt && curr->r.rd != curr->r.rt) { + /* sll rY, rX, 16 + * ... + * srl rZ, rY, 16 */ + + if (!reg_is_dead(list, offset, curr->r.rt) || + reg_is_read_or_written(list, idx, offset, curr->r.rd)) + return; + + /* If rY is dead after the SRL, and rZ is not used after the SLL, + * we can change rY to rZ */ + + pr_debug("Detected SLL/SRA with middle temp register\n"); + prev->r.rd = curr->r.rd; + curr->r.rt = prev->r.rd; + } + + /* We got a SLL/SRA combo. If imm #16, that's a cast to u16. + * If imm #24 that's a cast to u8. + * + * First of all, make sure that the target register of the SLL is not + * read before the SRA. */ + + if (prev->r.rd == prev->r.rt) { + /* sll rX, rX, 16 + * ... + * srl rY, rX, 16 */ + to_change = curr; + to_nop = prev; + + /* rX is used after the SRA - we cannot convert it. */ + if (prev->r.rd != curr->r.rd && !reg_is_dead(list, offset, prev->r.rd)) + return; + } else { + /* sll rY, rX, 16 + * ... + * srl rY, rY, 16 */ + to_change = prev; + to_nop = curr; + } + + idx2 = find_prev_writer(list, idx, prev->r.rt); + if (idx2 >= 0) { + /* Note that PSX games sometimes do casts after + * a LHU or LBU; in this case we can change the + * load opcode to a LH or LB, and the cast can + * be changed to a MOV or a simple NOP. */ + + prev2 = &list[idx2]; + + if (curr->r.rd != prev2->i.rt && + !reg_is_dead(list, offset, prev2->i.rt)) + prev2 = NULL; + else if (curr->r.imm == 16 && prev2->i.op == OP_LHU) + prev2->i.op = OP_LH; + else if (curr->r.imm == 24 && prev2->i.op == OP_LBU) + prev2->i.op = OP_LB; + else + prev2 = NULL; + + if (prev2) { + if (curr->r.rd == prev2->i.rt) { + to_change->opcode = 0; + } else if (reg_is_dead(list, offset, prev2->i.rt) && + !reg_is_read_or_written(list, idx2 + 1, offset, curr->r.rd)) { + /* The target register of the SRA is dead after the + * LBU/LHU; we can change the target register of the + * LBU/LHU to the one of the SRA. */ + prev2->i.rt = curr->r.rd; + to_change->opcode = 0; + } else { + to_change->i.op = OP_META_MOV; + to_change->r.rd = curr->r.rd; + to_change->r.rs = prev2->i.rt; + } + + if (to_nop->r.imm == 24) + pr_debug("Convert LBU+SLL+SRA to LB\n"); + else + pr_debug("Convert LHU+SLL+SRA to LH\n"); + } + } + + if (!prev2) { + pr_debug("Convert SLL/SRA #%u to EXT%c\n", + prev->r.imm, + prev->r.imm == 24 ? 'C' : 'S'); + + if (to_change == prev) { + to_change->i.rs = prev->r.rt; + to_change->i.rt = curr->r.rd; + } else { + to_change->i.rt = curr->r.rd; + to_change->i.rs = prev->r.rt; + } + + if (to_nop->r.imm == 24) + to_change->i.op = OP_META_EXTC; + else + to_change->i.op = OP_META_EXTS; + } + + to_nop->opcode = 0; +} + +static void lightrec_remove_useless_lui(struct block *block, unsigned int offset, + u32 known, u32 *values) +{ + struct opcode *list = block->opcode_list, + *op = &block->opcode_list[offset]; + int reader; + + if (!op_flag_sync(op->flags) && (known & BIT(op->i.rt)) && + values[op->i.rt] == op->i.imm << 16) { + pr_debug("Converting duplicated LUI to NOP\n"); + op->opcode = 0x0; + return; + } + + if (op->i.imm != 0 || op->i.rt == 0) + return; + + reader = find_next_reader(list, offset + 1, op->i.rt); + if (reader <= 0) + return; + + if (opcode_writes_register(list[reader].c, op->i.rt) || + reg_is_dead(list, reader, op->i.rt)) { + pr_debug("Removing useless LUI 0x0\n"); + + if (list[reader].i.rs == op->i.rt) + list[reader].i.rs = 0; + if (list[reader].i.op == OP_SPECIAL && + list[reader].i.rt == op->i.rt) + list[reader].i.rt = 0; + op->opcode = 0x0; + } +} + +static void lightrec_modify_lui(struct block *block, unsigned int offset) +{ + union code c, *lui = &block->opcode_list[offset].c; + bool stop = false, stop_next = false; + unsigned int i; + + for (i = offset + 1; !stop && i < block->nb_ops; i++) { + c = block->opcode_list[i].c; + stop = stop_next; + + if ((opcode_is_store(c) && c.i.rt == lui->i.rt) + || (!opcode_is_load(c) && opcode_reads_register(c, lui->i.rt))) + break; + + if (opcode_writes_register(c, lui->i.rt)) { + pr_debug("Convert LUI at offset 0x%x to kuseg\n", + i - 1 << 2); + lui->i.imm = kunseg(lui->i.imm << 16) >> 16; + break; + } + + if (has_delay_slot(c)) + stop_next = true; + } +} + +static int lightrec_transform_branches(struct lightrec_state *state, + struct block *block) +{ + struct opcode *op; + unsigned int i; + s32 offset; + + for (i = 0; i < block->nb_ops; i++) { + op = &block->opcode_list[i]; + + switch (op->i.op) { + case OP_J: + /* Transform J opcode into BEQ $zero, $zero if possible. */ + offset = (s32)((block->pc & 0xf0000000) >> 2 | op->j.imm) + - (s32)(block->pc >> 2) - (s32)i - 1; + + if (offset == (s16)offset) { + pr_debug("Transform J into BEQ $zero, $zero\n"); + op->i.op = OP_BEQ; + op->i.rs = 0; + op->i.rt = 0; + op->i.imm = offset; + + } + fallthrough; + default: + break; + } + } + + return 0; +} + +static inline bool is_power_of_two(u32 value) +{ + return popcount32(value) == 1; +} + static int lightrec_transform_ops(struct lightrec_state *state, struct block *block) { - struct opcode *list; + struct opcode *list = block->opcode_list; + struct opcode *prev, *op = NULL; + u32 known = BIT(0); + u32 values[32] = { 0 }; unsigned int i; + u8 tmp; for (i = 0; i < block->nb_ops; i++) { - list = &block->opcode_list[i]; + prev = op; + op = &list[i]; + + if (prev) + known = lightrec_propagate_consts(op, prev, known, values); /* Transform all opcodes detected as useless to real NOPs * (0x0: SLL r0, r0, #0) */ - if (list->opcode != 0 && is_nop(list->c)) { + if (op->opcode != 0 && is_nop(op->c)) { pr_debug("Converting useless opcode 0x%08x to NOP\n", - list->opcode); - list->opcode = 0x0; + op->opcode); + op->opcode = 0x0; } - if (!list->opcode) + if (!op->opcode) continue; - switch (list->i.op) { - /* Transform BEQ / BNE to BEQZ / BNEZ meta-opcodes if one of the - * two registers is zero. */ + switch (op->i.op) { case OP_BEQ: - if ((list->i.rs == 0) ^ (list->i.rt == 0)) { - list->i.op = OP_META_BEQZ; - if (list->i.rs == 0) { - list->i.rs = list->i.rt; - list->i.rt = 0; - } - } else if (list->i.rs == list->i.rt) { - list->i.rs = 0; - list->i.rt = 0; + if (op->i.rs == op->i.rt) { + op->i.rs = 0; + op->i.rt = 0; + } else if (op->i.rs == 0) { + op->i.rs = op->i.rt; + op->i.rt = 0; } break; + case OP_BNE: - if (list->i.rs == 0) { - list->i.op = OP_META_BNEZ; - list->i.rs = list->i.rt; - list->i.rt = 0; - } else if (list->i.rt == 0) { - list->i.op = OP_META_BNEZ; + if (op->i.rs == 0) { + op->i.rs = op->i.rt; + op->i.rt = 0; } break; + case OP_LUI: + if (!prev || !has_delay_slot(prev->c)) + lightrec_modify_lui(block, i); + lightrec_remove_useless_lui(block, i, known, values); + break; + /* Transform ORI/ADDI/ADDIU with imm #0 or ORR/ADD/ADDU/SUB/SUBU * with register $zero to the MOV meta-opcode */ case OP_ORI: case OP_ADDI: case OP_ADDIU: - if (list->i.imm == 0) { + if (op->i.imm == 0) { pr_debug("Convert ORI/ADDI/ADDIU #0 to MOV\n"); - list->i.op = OP_META_MOV; - list->r.rd = list->i.rt; + op->i.op = OP_META_MOV; + op->r.rd = op->i.rt; } break; case OP_SPECIAL: - switch (list->r.op) { - case OP_SPECIAL_SLL: + switch (op->r.op) { case OP_SPECIAL_SRA: + if (op->r.imm == 0) { + pr_debug("Convert SRA #0 to MOV\n"); + op->i.op = OP_META_MOV; + op->r.rs = op->r.rt; + break; + } + + lightrec_optimize_sll_sra(block->opcode_list, i); + break; + case OP_SPECIAL_SLL: case OP_SPECIAL_SRL: - if (list->r.imm == 0) { - pr_debug("Convert SLL/SRL/SRA #0 to MOV\n"); - list->i.op = OP_META_MOV; - list->r.rs = list->r.rt; + if (op->r.imm == 0) { + pr_debug("Convert SLL/SRL #0 to MOV\n"); + op->i.op = OP_META_MOV; + op->r.rs = op->r.rt; + } + break; + case OP_SPECIAL_MULT: + case OP_SPECIAL_MULTU: + if ((known & BIT(op->r.rs)) && + is_power_of_two(values[op->r.rs])) { + tmp = op->c.i.rs; + op->c.i.rs = op->c.i.rt; + op->c.i.rt = tmp; + } else if (!(known & BIT(op->r.rt)) || + !is_power_of_two(values[op->r.rt])) { + break; } + + pr_debug("Multiply by power-of-two: %u\n", + values[op->r.rt]); + + if (op->r.op == OP_SPECIAL_MULT) + op->i.op = OP_META_MULT2; + else + op->i.op = OP_META_MULTU2; + + op->r.op = ctz32(values[op->r.rt]); break; case OP_SPECIAL_OR: case OP_SPECIAL_ADD: case OP_SPECIAL_ADDU: - if (list->r.rs == 0) { + if (op->r.rs == 0) { pr_debug("Convert OR/ADD $zero to MOV\n"); - list->i.op = OP_META_MOV; - list->r.rs = list->r.rt; + op->i.op = OP_META_MOV; + op->r.rs = op->r.rt; } - case OP_SPECIAL_SUB: /* fall-through */ + fallthrough; + case OP_SPECIAL_SUB: case OP_SPECIAL_SUBU: - if (list->r.rt == 0) { + if (op->r.rt == 0) { pr_debug("Convert OR/ADD/SUB $zero to MOV\n"); - list->i.op = OP_META_MOV; + op->i.op = OP_META_MOV; } - default: /* fall-through */ + fallthrough; + default: break; } - default: /* fall-through */ + fallthrough; + default: break; } } @@ -661,12 +1142,70 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl return 0; } +static bool lightrec_can_switch_delay_slot(union code op, union code next_op) +{ + switch (op.i.op) { + case OP_SPECIAL: + switch (op.r.op) { + case OP_SPECIAL_JALR: + if (opcode_reads_register(next_op, op.r.rd) || + opcode_writes_register(next_op, op.r.rd)) + return false; + fallthrough; + case OP_SPECIAL_JR: + if (opcode_writes_register(next_op, op.r.rs)) + return false; + fallthrough; + default: + break; + } + fallthrough; + case OP_J: + break; + case OP_JAL: + if (opcode_reads_register(next_op, 31) || + opcode_writes_register(next_op, 31)) + return false;; + + break; + case OP_BEQ: + case OP_BNE: + if (op.i.rt && opcode_writes_register(next_op, op.i.rt)) + return false; + fallthrough; + case OP_BLEZ: + case OP_BGTZ: + if (op.i.rs && opcode_writes_register(next_op, op.i.rs)) + return false; + break; + case OP_REGIMM: + switch (op.r.rt) { + case OP_REGIMM_BLTZAL: + case OP_REGIMM_BGEZAL: + if (opcode_reads_register(next_op, 31) || + opcode_writes_register(next_op, 31)) + return false; + fallthrough; + case OP_REGIMM_BLTZ: + case OP_REGIMM_BGEZ: + if (op.i.rs && opcode_writes_register(next_op, op.i.rs)) + return false; + break; + } + fallthrough; + default: + break; + } + + return true; +} + static int lightrec_switch_delay_slots(struct lightrec_state *state, struct block *block) { struct opcode *list, *next = &block->opcode_list[0]; unsigned int i; union code op, next_op; - u8 flags; + u32 flags; for (i = 0; i < block->nb_ops - 1; i++) { list = next; @@ -674,77 +1213,29 @@ static int lightrec_switch_delay_slots(struct lightrec_state *state, struct bloc next_op = next->c; op = list->c; - if (!has_delay_slot(op) || - list->flags & (LIGHTREC_NO_DS | LIGHTREC_EMULATE_BRANCH) || + if (!has_delay_slot(op) || op_flag_no_ds(list->flags) || + op_flag_emulate_branch(list->flags) || op.opcode == 0 || next_op.opcode == 0) continue; if (i && has_delay_slot(block->opcode_list[i - 1].c) && - !(block->opcode_list[i - 1].flags & LIGHTREC_NO_DS)) + !op_flag_no_ds(block->opcode_list[i - 1].flags)) continue; - if ((list->flags & LIGHTREC_SYNC) || - (next->flags & LIGHTREC_SYNC)) + if (op_flag_sync(next->flags)) continue; - switch (list->i.op) { - case OP_SPECIAL: - switch (op.r.op) { - case OP_SPECIAL_JALR: - if (opcode_reads_register(next_op, op.r.rd) || - opcode_writes_register(next_op, op.r.rd)) - continue; - case OP_SPECIAL_JR: /* fall-through */ - if (opcode_writes_register(next_op, op.r.rs)) - continue; - default: /* fall-through */ - break; - } - case OP_J: /* fall-through */ - break; - case OP_JAL: - if (opcode_reads_register(next_op, 31) || - opcode_writes_register(next_op, 31)) - continue; - else - break; - case OP_BEQ: - case OP_BNE: - if (op.i.rt && opcode_writes_register(next_op, op.i.rt)) - continue; - case OP_BLEZ: /* fall-through */ - case OP_BGTZ: - case OP_META_BEQZ: - case OP_META_BNEZ: - if (op.i.rs && opcode_writes_register(next_op, op.i.rs)) - continue; - break; - case OP_REGIMM: - switch (op.r.rt) { - case OP_REGIMM_BLTZAL: - case OP_REGIMM_BGEZAL: - if (opcode_reads_register(next_op, 31) || - opcode_writes_register(next_op, 31)) - continue; - case OP_REGIMM_BLTZ: /* fall-through */ - case OP_REGIMM_BGEZ: - if (op.i.rs && - opcode_writes_register(next_op, op.i.rs)) - continue; - break; - } - default: /* fall-through */ - break; - } + if (!lightrec_can_switch_delay_slot(list->c, next_op)) + continue; pr_debug("Swap branch and delay slot opcodes " "at offsets 0x%x / 0x%x\n", i << 2, (i + 1) << 2); - flags = next->flags; + flags = next->flags | (list->flags & LIGHTREC_SYNC); list->c = next_op; next->c = op; - next->flags = list->flags | LIGHTREC_NO_DS; + next->flags = (list->flags | LIGHTREC_NO_DS) & ~LIGHTREC_SYNC; list->flags = flags | LIGHTREC_NO_DS; } @@ -753,7 +1244,7 @@ static int lightrec_switch_delay_slots(struct lightrec_state *state, struct bloc static int shrink_opcode_list(struct lightrec_state *state, struct block *block, u16 new_size) { - struct opcode *list; + struct opcode_list *list, *old_list; if (new_size >= block->nb_ops) { pr_err("Invalid shrink size (%u vs %u)\n", @@ -761,19 +1252,20 @@ static int shrink_opcode_list(struct lightrec_state *state, struct block *block, return -EINVAL; } - list = lightrec_malloc(state, MEM_FOR_IR, - sizeof(*list) * new_size); + sizeof(*list) + sizeof(struct opcode) * new_size); if (!list) { pr_err("Unable to allocate memory\n"); return -ENOMEM; } - memcpy(list, block->opcode_list, sizeof(*list) * new_size); + old_list = container_of(block->opcode_list, struct opcode_list, ops); + memcpy(list->ops, old_list->ops, sizeof(struct opcode) * new_size); - lightrec_free_opcode_list(state, block); - block->opcode_list = list; + lightrec_free_opcode_list(state, block->opcode_list); + list->nb_ops = new_size; block->nb_ops = new_size; + block->opcode_list = list->ops; pr_debug("Shrunk opcode list of block PC 0x%08x to %u opcodes\n", block->pc, new_size); @@ -784,13 +1276,14 @@ static int shrink_opcode_list(struct lightrec_state *state, struct block *block, static int lightrec_detect_impossible_branches(struct lightrec_state *state, struct block *block) { - struct opcode *op, *next = &block->opcode_list[0]; + struct opcode *op, *list = block->opcode_list, *next = &list[0]; unsigned int i; int ret = 0; + s16 offset; for (i = 0; i < block->nb_ops - 1; i++) { op = next; - next = &block->opcode_list[i + 1]; + next = &list[i + 1]; if (!has_delay_slot(op->c) || (!load_in_delay_slot(next->c) && @@ -805,9 +1298,23 @@ static int lightrec_detect_impossible_branches(struct lightrec_state *state, continue; } + offset = i + 1 + (s16)op->i.imm; + if (load_in_delay_slot(next->c) && + (offset >= 0 && offset < block->nb_ops) && + !opcode_reads_register(list[offset].c, next->c.i.rt)) { + /* The 'impossible' branch is a local branch - we can + * verify here that the first opcode of the target does + * not use the target register of the delay slot */ + + pr_debug("Branch at offset 0x%x has load delay slot, " + "but is local and dest opcode does not read " + "dest register\n", i << 2); + continue; + } + op->flags |= LIGHTREC_EMULATE_BRANCH; - if (op == block->opcode_list) { + if (op == list) { pr_debug("First opcode of block PC 0x%08x is an impossible branch\n", block->pc); @@ -841,12 +1348,11 @@ static int lightrec_local_branches(struct lightrec_state *state, struct block *b case OP_BLEZ: case OP_BGTZ: case OP_REGIMM: - case OP_META_BEQZ: - case OP_META_BNEZ: offset = i + 1 + (s16)list->i.imm; if (offset >= 0 && offset < block->nb_ops) break; - default: /* fall-through */ + fallthrough; + default: continue; } @@ -889,8 +1395,6 @@ bool has_delay_slot(union code op) case OP_BLEZ: case OP_BGTZ: case OP_REGIMM: - case OP_META_BEQZ: - case OP_META_BNEZ: return true; default: return false; @@ -899,105 +1403,287 @@ bool has_delay_slot(union code op) bool should_emulate(const struct opcode *list) { - return has_delay_slot(list->c) && - (list->flags & LIGHTREC_EMULATE_BRANCH); + return op_flag_emulate_branch(list->flags) && has_delay_slot(list->c); +} + +static bool op_writes_rd(union code c) +{ + switch (c.i.op) { + case OP_SPECIAL: + case OP_META_MOV: + return true; + default: + return false; + } +} + +static void lightrec_add_reg_op(struct opcode *op, u8 reg, u32 reg_op) +{ + if (op_writes_rd(op->c) && reg == op->r.rd) + op->flags |= LIGHTREC_REG_RD(reg_op); + else if (op->i.rs == reg) + op->flags |= LIGHTREC_REG_RS(reg_op); + else if (op->i.rt == reg) + op->flags |= LIGHTREC_REG_RT(reg_op); + else + pr_debug("Cannot add unload/clean/discard flag: " + "opcode does not touch register %s!\n", + lightrec_reg_name(reg)); } static void lightrec_add_unload(struct opcode *op, u8 reg) { - if (op->i.op == OP_SPECIAL && reg == op->r.rd) - op->flags |= LIGHTREC_UNLOAD_RD; + lightrec_add_reg_op(op, reg, LIGHTREC_REG_UNLOAD); +} + +static void lightrec_add_discard(struct opcode *op, u8 reg) +{ + lightrec_add_reg_op(op, reg, LIGHTREC_REG_DISCARD); +} + +static void lightrec_add_clean(struct opcode *op, u8 reg) +{ + lightrec_add_reg_op(op, reg, LIGHTREC_REG_CLEAN); +} + +static void +lightrec_early_unload_sync(struct opcode *list, s16 *last_r, s16 *last_w) +{ + unsigned int reg; + s16 offset; + + for (reg = 0; reg < 34; reg++) { + offset = s16_max(last_w[reg], last_r[reg]); + + if (offset >= 0) + lightrec_add_unload(&list[offset], reg); + } - if (op->i.rs == reg) - op->flags |= LIGHTREC_UNLOAD_RS; - if (op->i.rt == reg) - op->flags |= LIGHTREC_UNLOAD_RT; + memset(last_r, 0xff, sizeof(*last_r) * 34); + memset(last_w, 0xff, sizeof(*last_w) * 34); } static int lightrec_early_unload(struct lightrec_state *state, struct block *block) { - unsigned int i, offset; + u16 i, offset; struct opcode *op; + s16 last_r[34], last_w[34], last_sync = 0, next_sync = 0; + u64 mask_r, mask_w, dirty = 0, loaded = 0; u8 reg; - for (reg = 1; reg < 34; reg++) { - int last_r_id = -1, last_w_id = -1; + memset(last_r, 0xff, sizeof(last_r)); + memset(last_w, 0xff, sizeof(last_w)); + + /* + * Clean if: + * - the register is dirty, and is read again after a branch opcode + * + * Unload if: + * - the register is dirty or loaded, and is not read again + * - the register is dirty or loaded, and is written again after a branch opcode + * - the next opcode has the SYNC flag set + * + * Discard if: + * - the register is dirty or loaded, and is written again + */ + + for (i = 0; i < block->nb_ops; i++) { + op = &block->opcode_list[i]; - for (i = 0; i < block->nb_ops; i++) { - union code c = block->opcode_list[i].c; + if (op_flag_sync(op->flags) || should_emulate(op)) { + /* The next opcode has the SYNC flag set, or is a branch + * that should be emulated: unload all registers. */ + lightrec_early_unload_sync(block->opcode_list, last_r, last_w); + dirty = 0; + loaded = 0; + } - if (opcode_reads_register(c, reg)) - last_r_id = i; - if (opcode_writes_register(c, reg)) - last_w_id = i; + if (next_sync == i) { + last_sync = i; + pr_debug("Last sync: 0x%x\n", last_sync << 2); } - if (last_w_id > last_r_id) - offset = (unsigned int)last_w_id; - else if (last_r_id >= 0) - offset = (unsigned int)last_r_id; - else - continue; + if (has_delay_slot(op->c)) { + next_sync = i + 1 + !op_flag_no_ds(op->flags); + pr_debug("Next sync: 0x%x\n", next_sync << 2); + } - op = &block->opcode_list[offset]; + mask_r = opcode_read_mask(op->c); + mask_w = opcode_write_mask(op->c); - if (has_delay_slot(op->c) && (op->flags & LIGHTREC_NO_DS)) - offset++; + for (reg = 0; reg < 34; reg++) { + if (mask_r & BIT(reg)) { + if (dirty & BIT(reg) && last_w[reg] < last_sync) { + /* The register is dirty, and is read + * again after a branch: clean it */ - if (offset == block->nb_ops) - continue; + lightrec_add_clean(&block->opcode_list[last_w[reg]], reg); + dirty &= ~BIT(reg); + loaded |= BIT(reg); + } + + last_r[reg] = i; + } + + if (mask_w & BIT(reg)) { + if ((dirty & BIT(reg) && last_w[reg] < last_sync) || + (loaded & BIT(reg) && last_r[reg] < last_sync)) { + /* The register is dirty or loaded, and + * is written again after a branch: + * unload it */ + + offset = s16_max(last_w[reg], last_r[reg]); + lightrec_add_unload(&block->opcode_list[offset], reg); + dirty &= ~BIT(reg); + loaded &= ~BIT(reg); + } else if (!(mask_r & BIT(reg)) && + ((dirty & BIT(reg) && last_w[reg] > last_sync) || + (loaded & BIT(reg) && last_r[reg] > last_sync))) { + /* The register is dirty or loaded, and + * is written again: discard it */ + + offset = s16_max(last_w[reg], last_r[reg]); + lightrec_add_discard(&block->opcode_list[offset], reg); + dirty &= ~BIT(reg); + loaded &= ~BIT(reg); + } - lightrec_add_unload(&block->opcode_list[offset], reg); + last_w[reg] = i; + } + + } + + dirty |= mask_w; + loaded |= mask_r; } + /* Unload all registers that are dirty or loaded at the end of block. */ + lightrec_early_unload_sync(block->opcode_list, last_r, last_w); + return 0; } -static int lightrec_flag_stores(struct lightrec_state *state, struct block *block) +static int lightrec_flag_io(struct lightrec_state *state, struct block *block) { - struct opcode *list; + struct opcode *prev = NULL, *list = NULL; + enum psx_map psx_map; u32 known = BIT(0); u32 values[32] = { 0 }; unsigned int i; + u32 val, kunseg_val; + bool no_mask; for (i = 0; i < block->nb_ops; i++) { + prev = list; list = &block->opcode_list[i]; - /* Register $zero is always, well, zero */ - known |= BIT(0); - values[0] = 0; + if (prev) + known = lightrec_propagate_consts(list, prev, known, values); switch (list->i.op) { case OP_SB: case OP_SH: case OP_SW: - /* Mark all store operations that target $sp or $gp - * as not requiring code invalidation. This is based - * on the heuristic that stores using one of these - * registers as address will never hit a code page. */ - if (list->i.rs >= 28 && list->i.rs <= 29 && - !state->maps[PSX_MAP_KERNEL_USER_RAM].ops) { - pr_debug("Flaging opcode 0x%08x as not requiring invalidation\n", - list->opcode); - list->flags |= LIGHTREC_NO_INVALIDATE; - } + if (OPT_FLAG_STORES) { + /* Mark all store operations that target $sp or $gp + * as not requiring code invalidation. This is based + * on the heuristic that stores using one of these + * registers as address will never hit a code page. */ + if (list->i.rs >= 28 && list->i.rs <= 29 && + !state->maps[PSX_MAP_KERNEL_USER_RAM].ops) { + pr_debug("Flaging opcode 0x%08x as not " + "requiring invalidation\n", + list->opcode); + list->flags |= LIGHTREC_NO_INVALIDATE; + list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT); + } - /* Detect writes whose destination address is inside the - * current block, using constant propagation. When these - * occur, we mark the blocks as not compilable. */ - if ((known & BIT(list->i.rs)) && - kunseg(values[list->i.rs]) >= kunseg(block->pc) && - kunseg(values[list->i.rs]) < (kunseg(block->pc) + - block->nb_ops * 4)) { - pr_debug("Self-modifying block detected\n"); - block->flags |= BLOCK_NEVER_COMPILE; - list->flags |= LIGHTREC_SMC; + /* Detect writes whose destination address is inside the + * current block, using constant propagation. When these + * occur, we mark the blocks as not compilable. */ + if ((known & BIT(list->i.rs)) && + kunseg(values[list->i.rs]) >= kunseg(block->pc) && + kunseg(values[list->i.rs]) < (kunseg(block->pc) + + block->nb_ops * 4)) { + pr_debug("Self-modifying block detected\n"); + block_set_flags(block, BLOCK_NEVER_COMPILE); + list->flags |= LIGHTREC_SMC; + } + } + fallthrough; + case OP_SWL: + case OP_SWR: + case OP_SWC2: + case OP_LB: + case OP_LBU: + case OP_LH: + case OP_LHU: + case OP_LW: + case OP_LWL: + case OP_LWR: + case OP_LWC2: + if (OPT_FLAG_IO && (known & BIT(list->i.rs))) { + val = values[list->i.rs] + (s16) list->i.imm; + kunseg_val = kunseg(val); + psx_map = lightrec_get_map_idx(state, kunseg_val); + + list->flags &= ~LIGHTREC_IO_MASK; + no_mask = val == kunseg_val; + + switch (psx_map) { + case PSX_MAP_KERNEL_USER_RAM: + if (no_mask) + list->flags |= LIGHTREC_NO_MASK; + fallthrough; + case PSX_MAP_MIRROR1: + case PSX_MAP_MIRROR2: + case PSX_MAP_MIRROR3: + pr_debug("Flaging opcode %u as RAM access\n", i); + list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_RAM); + if (no_mask && state->mirrors_mapped) + list->flags |= LIGHTREC_NO_MASK; + break; + case PSX_MAP_BIOS: + pr_debug("Flaging opcode %u as BIOS access\n", i); + list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_BIOS); + if (no_mask) + list->flags |= LIGHTREC_NO_MASK; + break; + case PSX_MAP_SCRATCH_PAD: + pr_debug("Flaging opcode %u as scratchpad access\n", i); + list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_SCRATCH); + if (no_mask) + list->flags |= LIGHTREC_NO_MASK; + + /* Consider that we're never going to run code from + * the scratchpad. */ + list->flags |= LIGHTREC_NO_INVALIDATE; + break; + case PSX_MAP_HW_REGISTERS: + if (state->ops.hw_direct && + state->ops.hw_direct(kunseg_val, + opcode_is_store(list->c), + opcode_get_io_size(list->c))) { + pr_debug("Flagging opcode %u as direct I/O access\n", + i); + list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT_HW); + + if (no_mask) + list->flags |= LIGHTREC_NO_MASK; + break; + } + fallthrough; + default: + pr_debug("Flagging opcode %u as I/O access\n", + i); + list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW); + break; + } } - default: /* fall-through */ + fallthrough; + default: break; } - - known = lightrec_propagate_consts(list->c, known, values); } return 0; @@ -1023,7 +1709,7 @@ static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset, mask |= opcode_read_mask(op->c); mask |= opcode_write_mask(op->c); - if (op->flags & LIGHTREC_SYNC) + if (op_flag_sync(op->flags)) sync = true; switch (op->i.op) { @@ -1032,14 +1718,11 @@ static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset, case OP_BLEZ: case OP_BGTZ: case OP_REGIMM: - case OP_META_BEQZ: - case OP_META_BNEZ: /* TODO: handle backwards branches too */ - if (!last && - (op->flags & LIGHTREC_LOCAL_BRANCH) && + if (!last && op_flag_local_branch(op->flags) && (s16)op->c.i.imm >= 0) { branch_offset = i + 1 + (s16)op->c.i.imm - - !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS)); + - !!op_flag_no_ds(op->flags); reg = get_mfhi_mflo_reg(block, branch_offset, NULL, mask, sync, mflo, false); @@ -1052,6 +1735,9 @@ static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset, } return mflo ? REG_LO : REG_HI; + case OP_META_MULT2: + case OP_META_MULTU2: + return 0; case OP_SPECIAL: switch (op->r.op) { case OP_SPECIAL_MULT: @@ -1071,8 +1757,7 @@ static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset, if (op->r.rs != 31) return reg; - if (!sync && - !(op->flags & LIGHTREC_NO_DS) && + if (!sync && !op_flag_no_ds(op->flags) && (next->i.op == OP_SPECIAL) && ((!mflo && next->r.op == OP_SPECIAL_MFHI) || (mflo && next->r.op == OP_SPECIAL_MFLO))) @@ -1117,7 +1802,7 @@ static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset, break; } - /* fall-through */ + fallthrough; default: continue; } @@ -1144,13 +1829,10 @@ static void lightrec_replace_lo_hi(struct block *block, u16 offset, case OP_BLEZ: case OP_BGTZ: case OP_REGIMM: - case OP_META_BEQZ: - case OP_META_BNEZ: /* TODO: handle backwards branches too */ - if ((op->flags & LIGHTREC_LOCAL_BRANCH) && - (s16)op->c.i.imm >= 0) { + if (op_flag_local_branch(op->flags) && (s16)op->c.i.imm >= 0) { branch_offset = i + 1 + (s16)op->c.i.imm - - !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS)); + - !!op_flag_no_ds(op->flags); lightrec_replace_lo_hi(block, branch_offset, last, lo); lightrec_replace_lo_hi(block, i + 1, branch_offset, lo); @@ -1170,30 +1852,57 @@ static void lightrec_replace_lo_hi(struct block *block, u16 offset, return; } - /* fall-through */ + fallthrough; default: break; } } } +static bool lightrec_always_skip_div_check(void) +{ +#ifdef __mips__ + return true; +#else + return false; +#endif +} + static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block *block) { - struct opcode *list; + struct opcode *prev, *list = NULL; u8 reg_hi, reg_lo; unsigned int i; + u32 known = BIT(0); + u32 values[32] = { 0 }; for (i = 0; i < block->nb_ops - 1; i++) { + prev = list; list = &block->opcode_list[i]; - if (list->i.op != OP_SPECIAL) - continue; + if (prev) + known = lightrec_propagate_consts(list, prev, known, values); - switch (list->r.op) { - case OP_SPECIAL_MULT: - case OP_SPECIAL_MULTU: - case OP_SPECIAL_DIV: - case OP_SPECIAL_DIVU: + switch (list->i.op) { + case OP_SPECIAL: + switch (list->r.op) { + case OP_SPECIAL_DIV: + case OP_SPECIAL_DIVU: + /* If we are dividing by a non-zero constant, don't + * emit the div-by-zero check. */ + if (lightrec_always_skip_div_check() || + ((known & BIT(list->c.r.rt)) && values[list->c.r.rt])) + list->flags |= LIGHTREC_NO_DIV_CHECK; + fallthrough; + case OP_SPECIAL_MULT: + case OP_SPECIAL_MULTU: + break; + default: + continue; + } + fallthrough; + case OP_META_MULT2: + case OP_META_MULTU2: break; default: continue; @@ -1201,8 +1910,9 @@ static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block * /* Don't support opcodes in delay slots */ if ((i && has_delay_slot(block->opcode_list[i - 1].c)) || - (list->flags & LIGHTREC_NO_DS)) + op_flag_no_ds(list->flags)) { continue; + } reg_lo = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, true, false); if (reg_lo == 0) { @@ -1372,7 +2082,8 @@ static int lightrec_replace_memset(struct lightrec_state *state, struct block *b if (i == ARRAY_SIZE(memset_code) - 1) { /* success! */ pr_debug("Block at PC 0x%x is a memset\n", block->pc); - block->flags |= BLOCK_IS_MEMSET | BLOCK_NEVER_COMPILE; + block_set_flags(block, + BLOCK_IS_MEMSET | BLOCK_NEVER_COMPILE); /* Return non-zero to skip other optimizers. */ return 1; @@ -1386,10 +2097,11 @@ static int (*lightrec_optimizers[])(struct lightrec_state *state, struct block * IF_OPT(OPT_REMOVE_DIV_BY_ZERO_SEQ, &lightrec_remove_div_by_zero_check_sequence), IF_OPT(OPT_REPLACE_MEMSET, &lightrec_replace_memset), IF_OPT(OPT_DETECT_IMPOSSIBLE_BRANCHES, &lightrec_detect_impossible_branches), - IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_ops), + IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_branches), IF_OPT(OPT_LOCAL_BRANCHES, &lightrec_local_branches), + IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_ops), IF_OPT(OPT_SWITCH_DELAY_SLOTS, &lightrec_switch_delay_slots), - IF_OPT(OPT_FLAG_STORES, &lightrec_flag_stores), + IF_OPT(OPT_FLAG_IO || OPT_FLAG_STORES, &lightrec_flag_io), IF_OPT(OPT_FLAG_MULT_DIV, &lightrec_flag_mults_divs), IF_OPT(OPT_EARLY_UNLOAD, &lightrec_early_unload), }; diff --git a/deps/lightrec/reaper.c b/deps/lightrec/reaper.c index 2d9e68486..777b99704 100644 --- a/deps/lightrec/reaper.c +++ b/deps/lightrec/reaper.c @@ -12,6 +12,7 @@ #include #include +#include #include struct reaper_elm { @@ -23,7 +24,11 @@ struct reaper_elm { struct reaper { struct lightrec_state *state; pthread_mutex_t mutex; + pthread_cond_t cond; struct slist_elm reap_list; + + bool running; + atomic_uint sem; }; struct reaper *lightrec_reaper_init(struct lightrec_state *state) @@ -38,21 +43,36 @@ struct reaper *lightrec_reaper_init(struct lightrec_state *state) } reaper->state = state; + reaper->running = false; + reaper->sem = 0; slist_init(&reaper->reap_list); ret = pthread_mutex_init(&reaper->mutex, NULL); if (ret) { pr_err("Cannot init mutex variable: %d\n", ret); - lightrec_free(reaper->state, MEM_FOR_LIGHTREC, - sizeof(*reaper), reaper); - return NULL; + goto err_free_reaper; + } + + ret = pthread_cond_init(&reaper->cond, NULL); + if (ret) { + pr_err("Cannot init cond variable: %d\n", ret); + goto err_destroy_mutex; } return reaper; + +err_destroy_mutex: + pthread_mutex_destroy(&reaper->mutex); +err_free_reaper: + lightrec_free(reaper->state, MEM_FOR_LIGHTREC, sizeof(*reaper), reaper); + return NULL; } void lightrec_reaper_destroy(struct reaper *reaper) { + lightrec_reaper_reap(reaper); + + pthread_cond_destroy(&reaper->cond); pthread_mutex_destroy(&reaper->mutex); lightrec_free(reaper->state, MEM_FOR_LIGHTREC, sizeof(*reaper), reaper); } @@ -89,6 +109,11 @@ int lightrec_reaper_add(struct reaper *reaper, reap_func_t f, void *data) return ret; } +static bool lightrec_reaper_can_reap(struct reaper *reaper) +{ + return !atomic_load_explicit(&reaper->sem, memory_order_relaxed); +} + void lightrec_reaper_reap(struct reaper *reaper) { struct reaper_elm *reaper_elm; @@ -96,8 +121,10 @@ void lightrec_reaper_reap(struct reaper *reaper) pthread_mutex_lock(&reaper->mutex); - while (!!(elm = slist_first(&reaper->reap_list))) { + while (lightrec_reaper_can_reap(reaper) && + !!(elm = slist_first(&reaper->reap_list))) { slist_remove(&reaper->reap_list, elm); + reaper->running = true; pthread_mutex_unlock(&reaper->mutex); reaper_elm = container_of(elm, struct reaper_elm, slist); @@ -108,7 +135,24 @@ void lightrec_reaper_reap(struct reaper *reaper) sizeof(*reaper_elm), reaper_elm); pthread_mutex_lock(&reaper->mutex); + reaper->running = false; + pthread_cond_broadcast(&reaper->cond); } pthread_mutex_unlock(&reaper->mutex); } + +void lightrec_reaper_pause(struct reaper *reaper) +{ + atomic_fetch_add_explicit(&reaper->sem, 1, memory_order_relaxed); + + pthread_mutex_lock(&reaper->mutex); + while (reaper->running) + pthread_cond_wait(&reaper->cond, &reaper->mutex); + pthread_mutex_unlock(&reaper->mutex); +} + +void lightrec_reaper_continue(struct reaper *reaper) +{ + atomic_fetch_sub_explicit(&reaper->sem, 1, memory_order_relaxed); +} diff --git a/deps/lightrec/reaper.h b/deps/lightrec/reaper.h index b7d493cb2..49b6a1a34 100644 --- a/deps/lightrec/reaper.h +++ b/deps/lightrec/reaper.h @@ -17,4 +17,7 @@ void lightrec_reaper_destroy(struct reaper *reaper); int lightrec_reaper_add(struct reaper *reaper, reap_func_t f, void *data); void lightrec_reaper_reap(struct reaper *reaper); +void lightrec_reaper_pause(struct reaper *reaper); +void lightrec_reaper_continue(struct reaper *reaper); + #endif /* __LIGHTREC_REAPER_H__ */ diff --git a/deps/lightrec/recompiler.c b/deps/lightrec/recompiler.c index 0167863cc..08a9235a6 100644 --- a/deps/lightrec/recompiler.c +++ b/deps/lightrec/recompiler.c @@ -3,10 +3,12 @@ * Copyright (C) 2019-2021 Paul Cercueil */ +#include "blockcache.h" #include "debug.h" #include "interpreter.h" #include "lightrec-private.h" #include "memmanager.h" +#include "reaper.h" #include "slist.h" #include @@ -14,40 +16,152 @@ #include #include #include +#ifdef __linux__ +#include +#endif struct block_rec { struct block *block; struct slist_elm slist; + bool compiling; +}; + +struct recompiler_thd { + struct lightrec_cstate *cstate; + unsigned int tid; + pthread_t thd; }; struct recompiler { struct lightrec_state *state; - pthread_t thd; pthread_cond_t cond; + pthread_cond_t cond2; pthread_mutex_t mutex; - bool stop; - struct block *current_block; + bool stop, must_flush; struct slist_elm slist; + + pthread_mutex_t alloc_mutex; + + unsigned int nb_recs; + struct recompiler_thd thds[]; }; -static void lightrec_compile_list(struct recompiler *rec) +static unsigned int get_processors_count(void) +{ + unsigned int nb = 1; + +#if defined(PTW32_VERSION) + nb = pthread_num_processors_np(); +#elif defined(__APPLE__) || defined(__FreeBSD__) + int count; + size_t size = sizeof(count); + + nb = sysctlbyname("hw.ncpu", &count, &size, NULL, 0) ? 1 : count; +#elif defined(_SC_NPROCESSORS_ONLN) + nb = sysconf(_SC_NPROCESSORS_ONLN); +#endif + + return nb < 1 ? 1 : nb; +} + +static struct slist_elm * lightrec_get_first_elm(struct slist_elm *head) +{ + struct block_rec *block_rec; + struct slist_elm *elm; + + for (elm = slist_first(head); elm; elm = elm->next) { + block_rec = container_of(elm, struct block_rec, slist); + + if (!block_rec->compiling) + return elm; + } + + return NULL; +} + +static bool lightrec_cancel_block_rec(struct recompiler *rec, + struct block_rec *block_rec) +{ + if (block_rec->compiling) { + /* Block is being recompiled - wait for + * completion */ + pthread_cond_wait(&rec->cond2, &rec->mutex); + + /* We can't guarantee the signal was for us. + * Since block_rec may have been removed while + * we were waiting on the condition, we cannot + * check block_rec->compiling again. The best + * thing is just to restart the function. */ + return false; + } + + /* Block is not yet being processed - remove it from the list */ + slist_remove(&rec->slist, &block_rec->slist); + lightrec_free(rec->state, MEM_FOR_LIGHTREC, + sizeof(*block_rec), block_rec); + + return true; +} + +static void lightrec_cancel_list(struct recompiler *rec) +{ + struct block_rec *block_rec; + struct slist_elm *elm, *head = &rec->slist; + + for (elm = slist_first(head); elm; elm = slist_first(head)) { + block_rec = container_of(elm, struct block_rec, slist); + lightrec_cancel_block_rec(rec, block_rec); + } +} + +static void lightrec_flush_code_buffer(struct lightrec_state *state, void *d) +{ + struct recompiler *rec = d; + + lightrec_remove_outdated_blocks(state->block_cache, NULL); + rec->must_flush = false; +} + +static void lightrec_compile_list(struct recompiler *rec, + struct recompiler_thd *thd) { struct block_rec *block_rec; struct slist_elm *next; struct block *block; int ret; - while (!!(next = slist_first(&rec->slist))) { + while (!!(next = lightrec_get_first_elm(&rec->slist))) { block_rec = container_of(next, struct block_rec, slist); + block_rec->compiling = true; block = block_rec->block; - rec->current_block = block; pthread_mutex_unlock(&rec->mutex); - ret = lightrec_compile_block(rec->state, block); - if (ret) { - pr_err("Unable to compile block at PC 0x%x: %d\n", - block->pc, ret); + if (likely(!block_has_flag(block, BLOCK_IS_DEAD))) { + ret = lightrec_compile_block(thd->cstate, block); + if (ret == -ENOMEM) { + /* Code buffer is full. Request the reaper to + * flush it. */ + + pthread_mutex_lock(&rec->mutex); + block_rec->compiling = false; + pthread_cond_broadcast(&rec->cond2); + + if (!rec->must_flush) { + rec->must_flush = true; + lightrec_cancel_list(rec); + + lightrec_reaper_add(rec->state->reaper, + lightrec_flush_code_buffer, + rec); + } + return; + } + + if (ret) { + pr_err("Unable to compile block at PC 0x%x: %d\n", + block->pc, ret); + } } pthread_mutex_lock(&rec->mutex); @@ -55,15 +169,14 @@ static void lightrec_compile_list(struct recompiler *rec) slist_remove(&rec->slist, next); lightrec_free(rec->state, MEM_FOR_LIGHTREC, sizeof(*block_rec), block_rec); - pthread_cond_signal(&rec->cond); + pthread_cond_broadcast(&rec->cond2); } - - rec->current_block = NULL; } static void * lightrec_recompiler_thd(void *d) { - struct recompiler *rec = d; + struct recompiler_thd *thd = d; + struct recompiler *rec = container_of(thd, struct recompiler, thds[thd->tid]); pthread_mutex_lock(&rec->mutex); @@ -76,7 +189,7 @@ static void * lightrec_recompiler_thd(void *d) } while (slist_empty(&rec->slist)); - lightrec_compile_list(rec); + lightrec_compile_list(rec, thd); } out_unlock: @@ -87,62 +200,115 @@ static void * lightrec_recompiler_thd(void *d) struct recompiler *lightrec_recompiler_init(struct lightrec_state *state) { struct recompiler *rec; + unsigned int i, nb_recs, nb_cpus; int ret; - rec = lightrec_malloc(state, MEM_FOR_LIGHTREC, sizeof(*rec)); + nb_cpus = get_processors_count(); + nb_recs = nb_cpus < 2 ? 1 : nb_cpus - 1; + + rec = lightrec_malloc(state, MEM_FOR_LIGHTREC, sizeof(*rec) + + nb_recs * sizeof(*rec->thds)); if (!rec) { pr_err("Cannot create recompiler: Out of memory\n"); return NULL; } + for (i = 0; i < nb_recs; i++) { + rec->thds[i].tid = i; + rec->thds[i].cstate = NULL; + } + + for (i = 0; i < nb_recs; i++) { + rec->thds[i].cstate = lightrec_create_cstate(state); + if (!rec->thds[i].cstate) { + pr_err("Cannot create recompiler: Out of memory\n"); + goto err_free_cstates; + } + } + rec->state = state; rec->stop = false; - rec->current_block = NULL; + rec->must_flush = false; + rec->nb_recs = nb_recs; slist_init(&rec->slist); ret = pthread_cond_init(&rec->cond, NULL); if (ret) { pr_err("Cannot init cond variable: %d\n", ret); - goto err_free_rec; + goto err_free_cstates; } - ret = pthread_mutex_init(&rec->mutex, NULL); + ret = pthread_cond_init(&rec->cond2, NULL); if (ret) { - pr_err("Cannot init mutex variable: %d\n", ret); + pr_err("Cannot init cond variable: %d\n", ret); goto err_cnd_destroy; } - ret = pthread_create(&rec->thd, NULL, lightrec_recompiler_thd, rec); + ret = pthread_mutex_init(&rec->alloc_mutex, NULL); if (ret) { - pr_err("Cannot create recompiler thread: %d\n", ret); - goto err_mtx_destroy; + pr_err("Cannot init alloc mutex variable: %d\n", ret); + goto err_cnd2_destroy; } - pr_info("Threaded recompiler started\n"); + ret = pthread_mutex_init(&rec->mutex, NULL); + if (ret) { + pr_err("Cannot init mutex variable: %d\n", ret); + goto err_alloc_mtx_destroy; + } + + for (i = 0; i < nb_recs; i++) { + ret = pthread_create(&rec->thds[i].thd, NULL, + lightrec_recompiler_thd, &rec->thds[i]); + if (ret) { + pr_err("Cannot create recompiler thread: %d\n", ret); + /* TODO: Handle cleanup properly */ + goto err_mtx_destroy; + } + } + + pr_info("Threaded recompiler started with %u workers.\n", nb_recs); return rec; err_mtx_destroy: pthread_mutex_destroy(&rec->mutex); +err_alloc_mtx_destroy: + pthread_mutex_destroy(&rec->alloc_mutex); +err_cnd2_destroy: + pthread_cond_destroy(&rec->cond2); err_cnd_destroy: pthread_cond_destroy(&rec->cond); -err_free_rec: +err_free_cstates: + for (i = 0; i < nb_recs; i++) { + if (rec->thds[i].cstate) + lightrec_free_cstate(rec->thds[i].cstate); + } lightrec_free(state, MEM_FOR_LIGHTREC, sizeof(*rec), rec); return NULL; } void lightrec_free_recompiler(struct recompiler *rec) { + unsigned int i; + rec->stop = true; /* Stop the thread */ pthread_mutex_lock(&rec->mutex); - pthread_cond_signal(&rec->cond); + pthread_cond_broadcast(&rec->cond); + lightrec_cancel_list(rec); pthread_mutex_unlock(&rec->mutex); - pthread_join(rec->thd, NULL); + + for (i = 0; i < rec->nb_recs; i++) + pthread_join(rec->thds[i].thd, NULL); + + for (i = 0; i < rec->nb_recs; i++) + lightrec_free_cstate(rec->thds[i].cstate); pthread_mutex_destroy(&rec->mutex); + pthread_mutex_destroy(&rec->alloc_mutex); pthread_cond_destroy(&rec->cond); + pthread_cond_destroy(&rec->cond2); lightrec_free(rec->state, MEM_FOR_LIGHTREC, sizeof(*rec), rec); } @@ -154,9 +320,15 @@ int lightrec_recompiler_add(struct recompiler *rec, struct block *block) pthread_mutex_lock(&rec->mutex); + /* If the recompiler must flush the code cache, we can't add the new + * job. It will be re-added next time the block's address is jumped to + * again. */ + if (rec->must_flush) + goto out_unlock; + /* If the block is marked as dead, don't compile it, it will be removed * as soon as it's safe. */ - if (block->flags & BLOCK_IS_DEAD) + if (block_has_flag(block, BLOCK_IS_DEAD)) goto out_unlock; for (elm = slist_first(&rec->slist), prev = NULL; elm; @@ -167,7 +339,8 @@ int lightrec_recompiler_add(struct recompiler *rec, struct block *block) /* The block to compile is already in the queue - bump * it to the top of the list, unless the block is being * recompiled. */ - if (prev && !(block->flags & BLOCK_SHOULD_RECOMPILE)) { + if (prev && !block_rec->compiling && + !block_has_flag(block, BLOCK_SHOULD_RECOMPILE)) { slist_remove_next(prev); slist_append(&rec->slist, elm); } @@ -178,7 +351,7 @@ int lightrec_recompiler_add(struct recompiler *rec, struct block *block) /* By the time this function was called, the block has been recompiled * and ins't in the wait list anymore. Just return here. */ - if (block->function && !(block->flags & BLOCK_SHOULD_RECOMPILE)) + if (block->function && !block_has_flag(block, BLOCK_SHOULD_RECOMPILE)) goto out_unlock; block_rec = lightrec_malloc(rec->state, MEM_FOR_LIGHTREC, @@ -191,12 +364,13 @@ int lightrec_recompiler_add(struct recompiler *rec, struct block *block) pr_debug("Adding block PC 0x%x to recompiler\n", block->pc); block_rec->block = block; + block_rec->compiling = false; elm = &rec->slist; /* If the block is being recompiled, push it to the end of the queue; * otherwise push it to the front of the queue. */ - if (block->flags & BLOCK_SHOULD_RECOMPILE) + if (block_has_flag(block, BLOCK_SHOULD_RECOMPILE)) for (; elm->next; elm = elm->next); slist_append(elm, &block_rec->slist); @@ -206,6 +380,7 @@ int lightrec_recompiler_add(struct recompiler *rec, struct block *block) out_unlock: pthread_mutex_unlock(&rec->mutex); + return ret; } @@ -216,49 +391,59 @@ void lightrec_recompiler_remove(struct recompiler *rec, struct block *block) pthread_mutex_lock(&rec->mutex); - for (elm = slist_first(&rec->slist); elm; elm = elm->next) { - block_rec = container_of(elm, struct block_rec, slist); + while (true) { + for (elm = slist_first(&rec->slist); elm; elm = elm->next) { + block_rec = container_of(elm, struct block_rec, slist); - if (block_rec->block == block) { - if (block == rec->current_block) { - /* Block is being recompiled - wait for - * completion */ - do { - pthread_cond_wait(&rec->cond, - &rec->mutex); - } while (block == rec->current_block); - } else { - /* Block is not yet being processed - remove it - * from the list */ - slist_remove(&rec->slist, elm); - lightrec_free(rec->state, MEM_FOR_LIGHTREC, - sizeof(*block_rec), block_rec); + if (block_rec->block == block) { + if (lightrec_cancel_block_rec(rec, block_rec)) + goto out_unlock; + + break; } + } + if (!elm) break; - } } +out_unlock: pthread_mutex_unlock(&rec->mutex); } void * lightrec_recompiler_run_first_pass(struct lightrec_state *state, struct block *block, u32 *pc) { - bool freed; + u8 old_flags; + + /* There's no point in running the first pass if the block will never + * be compiled. Let the main loop run the interpreter instead. */ + if (block_has_flag(block, BLOCK_NEVER_COMPILE)) + return NULL; + + /* The block is marked as dead, and will be removed the next time the + * reaper is run. In the meantime, the old function can still be + * executed. */ + if (block_has_flag(block, BLOCK_IS_DEAD)) + return block->function; + + /* If the block is already fully tagged, there is no point in running + * the first pass. Request a recompilation of the block, and maybe the + * interpreter will run the block in the meantime. */ + if (block_has_flag(block, BLOCK_FULLY_TAGGED)) + lightrec_recompiler_add(state->rec, block); if (likely(block->function)) { - if (block->flags & BLOCK_FULLY_TAGGED) { - freed = atomic_flag_test_and_set(&block->op_list_freed); + if (block_has_flag(block, BLOCK_FULLY_TAGGED)) { + old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST); - if (!freed) { + if (!(old_flags & BLOCK_NO_OPCODE_LIST)) { pr_debug("Block PC 0x%08x is fully tagged" " - free opcode list\n", block->pc); /* The block was already compiled but the opcode list * didn't get freed yet - do it now */ - lightrec_free_opcode_list(state, block); - block->opcode_list = NULL; + lightrec_free_opcode_list(state, block->opcode_list); } } @@ -267,24 +452,36 @@ void * lightrec_recompiler_run_first_pass(struct lightrec_state *state, /* Mark the opcode list as freed, so that the threaded compiler won't * free it while we're using it in the interpreter. */ - freed = atomic_flag_test_and_set(&block->op_list_freed); + old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST); /* Block wasn't compiled yet - run the interpreter */ *pc = lightrec_emulate_block(state, block, *pc); - if (!freed) - atomic_flag_clear(&block->op_list_freed); + if (!(old_flags & BLOCK_NO_OPCODE_LIST)) + block_clear_flags(block, BLOCK_NO_OPCODE_LIST); /* The block got compiled while the interpreter was running. * We can free the opcode list now. */ - if (block->function && (block->flags & BLOCK_FULLY_TAGGED) && - !atomic_flag_test_and_set(&block->op_list_freed)) { - pr_debug("Block PC 0x%08x is fully tagged" - " - free opcode list\n", block->pc); + if (block->function && block_has_flag(block, BLOCK_FULLY_TAGGED)) { + old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST); - lightrec_free_opcode_list(state, block); - block->opcode_list = NULL; + if (!(old_flags & BLOCK_NO_OPCODE_LIST)) { + pr_debug("Block PC 0x%08x is fully tagged" + " - free opcode list\n", block->pc); + + lightrec_free_opcode_list(state, block->opcode_list); + } } return NULL; } + +void lightrec_code_alloc_lock(struct lightrec_state *state) +{ + pthread_mutex_lock(&state->rec->alloc_mutex); +} + +void lightrec_code_alloc_unlock(struct lightrec_state *state) +{ + pthread_mutex_unlock(&state->rec->alloc_mutex); +} diff --git a/deps/lightrec/recompiler.h b/deps/lightrec/recompiler.h index 9bc522d14..b9fc57981 100644 --- a/deps/lightrec/recompiler.h +++ b/deps/lightrec/recompiler.h @@ -18,4 +18,7 @@ void lightrec_recompiler_remove(struct recompiler *rec, struct block *block); void * lightrec_recompiler_run_first_pass(struct lightrec_state *state, struct block *block, u32 *pc); +void lightrec_code_alloc_lock(struct lightrec_state *state); +void lightrec_code_alloc_unlock(struct lightrec_state *state); + #endif /* __LIGHTREC_RECOMPILER_H__ */ diff --git a/deps/lightrec/regcache.c b/deps/lightrec/regcache.c index a19c35815..1f11d8a27 100644 --- a/deps/lightrec/regcache.c +++ b/deps/lightrec/regcache.c @@ -5,16 +5,28 @@ #include "debug.h" #include "memmanager.h" +#include "lightning-wrapper.h" #include "regcache.h" -#include #include #include +enum reg_priority { + REG_IS_TEMP, + REG_IS_TEMP_VALUE, + REG_IS_ZERO, + REG_IS_LOADED, + REG_IS_DIRTY, + + REG_NB_PRIORITIES, +}; + struct native_register { - bool used, loaded, dirty, output, extend, extended, + bool used, output, extend, extended, zero_extend, zero_extended, locked; s8 emulated_register; + intptr_t value; + enum reg_priority prio; }; struct regcache { @@ -40,6 +52,24 @@ const char * lightrec_reg_name(u8 reg) return mips_regs[reg]; } +static inline bool lightrec_reg_is_zero(u8 jit_reg) +{ +#if defined(__mips__) || defined(__alpha__) || defined(__riscv) + if (jit_reg == _ZERO) + return true; +#endif + return false; +} + +static inline s8 lightrec_get_hardwired_reg(u8 reg) +{ +#if defined(__mips__) || defined(__alpha__) || defined(__riscv) + if (reg == 0) + return _ZERO; +#endif + return -1; +} + static inline u8 lightrec_reg_number(const struct regcache *cache, const struct native_register *nreg) { @@ -51,7 +81,11 @@ static inline u8 lightrec_reg_to_lightning(const struct regcache *cache, const struct native_register *nreg) { u8 offset = lightrec_reg_number(cache, nreg); - return offset < NUM_REGS ? JIT_V(offset) : JIT_R(offset - NUM_REGS); + + if (offset < NUM_REGS) + return JIT_V(FIRST_REG + offset); + else + return JIT_R(FIRST_TEMP + offset - NUM_REGS); } static inline struct native_register * lightning_reg_to_lightrec( @@ -60,22 +94,26 @@ static inline struct native_register * lightning_reg_to_lightrec( if ((JIT_V0 > JIT_R0 && reg >= JIT_V0) || (JIT_V0 < JIT_R0 && reg < JIT_R0)) { if (JIT_V1 > JIT_V0) - return &cache->lightrec_regs[reg - JIT_V0]; + return &cache->lightrec_regs[reg - JIT_V(FIRST_REG)]; else - return &cache->lightrec_regs[JIT_V0 - reg]; + return &cache->lightrec_regs[JIT_V(FIRST_REG) - reg]; } else { if (JIT_R1 > JIT_R0) - return &cache->lightrec_regs[NUM_REGS + reg - JIT_R0]; + return &cache->lightrec_regs[NUM_REGS + reg - JIT_R(FIRST_TEMP)]; else - return &cache->lightrec_regs[NUM_REGS + JIT_R0 - reg]; + return &cache->lightrec_regs[NUM_REGS + JIT_R(FIRST_TEMP) - reg]; } } u8 lightrec_get_reg_in_flags(struct regcache *cache, u8 jit_reg) { - struct native_register *reg = lightning_reg_to_lightrec(cache, jit_reg); + struct native_register *reg; u8 flags = 0; + if (lightrec_reg_is_zero(jit_reg)) + return REG_EXT | REG_ZEXT; + + reg = lightning_reg_to_lightrec(cache, jit_reg); if (reg->extended) flags |= REG_EXT; if (reg->zero_extended) @@ -86,14 +124,19 @@ u8 lightrec_get_reg_in_flags(struct regcache *cache, u8 jit_reg) void lightrec_set_reg_out_flags(struct regcache *cache, u8 jit_reg, u8 flags) { - struct native_register *reg = lightning_reg_to_lightrec(cache, jit_reg); + struct native_register *reg; - reg->extend = flags & REG_EXT; - reg->zero_extend = flags & REG_ZEXT; + if (!lightrec_reg_is_zero(jit_reg)) { + reg = lightning_reg_to_lightrec(cache, jit_reg); + reg->extend = flags & REG_EXT; + reg->zero_extend = flags & REG_ZEXT; + } } static struct native_register * alloc_temp(struct regcache *cache) { + struct native_register *elm, *nreg = NULL; + enum reg_priority best = REG_NB_PRIORITIES; unsigned int i; /* We search the register list in reverse order. As temporaries are @@ -101,18 +144,18 @@ static struct native_register * alloc_temp(struct regcache *cache) * caller-saved registers, as they won't have to be saved back to * memory. */ for (i = ARRAY_SIZE(cache->lightrec_regs); i; i--) { - struct native_register *nreg = &cache->lightrec_regs[i - 1]; - if (!nreg->used && !nreg->loaded && !nreg->dirty) - return nreg; - } + elm = &cache->lightrec_regs[i - 1]; - for (i = ARRAY_SIZE(cache->lightrec_regs); i; i--) { - struct native_register *nreg = &cache->lightrec_regs[i - 1]; - if (!nreg->used) - return nreg; + if (!elm->used && elm->prio < best) { + nreg = elm; + best = elm->prio; + + if (best == REG_IS_TEMP) + break; + } } - return NULL; + return nreg; } static struct native_register * find_mapped_reg(struct regcache *cache, @@ -122,9 +165,9 @@ static struct native_register * find_mapped_reg(struct regcache *cache, for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) { struct native_register *nreg = &cache->lightrec_regs[i]; - if ((!reg || nreg->loaded || nreg->dirty) && - nreg->emulated_register == reg && - (!out || !nreg->locked)) + if ((nreg->prio >= REG_IS_ZERO) && + nreg->emulated_register == reg && + (!out || !nreg->locked)) return nreg; } @@ -134,7 +177,8 @@ static struct native_register * find_mapped_reg(struct regcache *cache, static struct native_register * alloc_in_out(struct regcache *cache, u8 reg, bool out) { - struct native_register *nreg; + struct native_register *elm, *nreg = NULL; + enum reg_priority best = REG_NB_PRIORITIES; unsigned int i; /* Try to find if the register is already mapped somewhere */ @@ -142,49 +186,40 @@ static struct native_register * alloc_in_out(struct regcache *cache, if (nreg) return nreg; - /* Try to allocate a non-dirty, non-loaded register. - * Loaded registers may be re-used later, so it's better to avoid - * re-using one if possible. */ - for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) { - nreg = &cache->lightrec_regs[i]; - if (!nreg->used && !nreg->dirty && !nreg->loaded) - return nreg; - } + nreg = NULL; - /* Try to allocate a non-dirty register */ for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) { - nreg = &cache->lightrec_regs[i]; - if (!nreg->used && !nreg->dirty) - return nreg; - } + elm = &cache->lightrec_regs[i]; - for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) { - nreg = &cache->lightrec_regs[i]; - if (!nreg->used) - return nreg; + if (!elm->used && elm->prio < best) { + nreg = elm; + best = elm->prio; + + if (best == REG_IS_TEMP) + break; + } } - return NULL; + return nreg; } static void lightrec_discard_nreg(struct native_register *nreg) { nreg->extended = false; nreg->zero_extended = false; - nreg->loaded = false; nreg->output = false; - nreg->dirty = false; nreg->used = false; nreg->locked = false; nreg->emulated_register = -1; + nreg->prio = 0; } static void lightrec_unload_nreg(struct regcache *cache, jit_state_t *_jit, struct native_register *nreg, u8 jit_reg) { /* If we get a dirty register, store back the old value */ - if (nreg->dirty) { - s16 offset = offsetof(struct lightrec_state, native_reg_cache) + if (nreg->prio == REG_IS_DIRTY) { + s16 offset = offsetof(struct lightrec_state, regs.gpr) + (nreg->emulated_register << 2); jit_stxi_i(offset, LIGHTREC_REG_STATE, jit_reg); @@ -195,6 +230,9 @@ static void lightrec_unload_nreg(struct regcache *cache, jit_state_t *_jit, void lightrec_unload_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg) { + if (lightrec_reg_is_zero(jit_reg)) + return; + lightrec_unload_nreg(cache, _jit, lightning_reg_to_lightrec(cache, jit_reg), jit_reg); } @@ -203,8 +241,12 @@ void lightrec_unload_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg) * A locked register cannot only be used as input, not output. */ void lightrec_lock_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg) { - struct native_register *reg = lightning_reg_to_lightrec(cache, jit_reg); + struct native_register *reg; + + if (lightrec_reg_is_zero(jit_reg)) + return; + reg = lightning_reg_to_lightrec(cache, jit_reg); lightrec_clean_reg(cache, _jit, jit_reg); reg->locked = true; @@ -212,11 +254,16 @@ void lightrec_lock_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg) u8 lightrec_alloc_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg) { - struct native_register *reg = lightning_reg_to_lightrec(cache, jit_reg); + struct native_register *reg; + + if (lightrec_reg_is_zero(jit_reg)) + return jit_reg; + reg = lightning_reg_to_lightrec(cache, jit_reg); lightrec_unload_nreg(cache, _jit, reg, jit_reg); reg->used = true; + reg->prio = REG_IS_LOADED; return jit_reg; } @@ -233,15 +280,50 @@ u8 lightrec_alloc_reg_temp(struct regcache *cache, jit_state_t *_jit) jit_reg = lightrec_reg_to_lightning(cache, nreg); lightrec_unload_nreg(cache, _jit, nreg, jit_reg); + nreg->prio = REG_IS_TEMP; nreg->used = true; return jit_reg; } +s8 lightrec_get_reg_with_value(struct regcache *cache, intptr_t value) +{ + struct native_register *nreg; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) { + nreg = &cache->lightrec_regs[i]; + + if (nreg->prio == REG_IS_TEMP_VALUE && nreg->value == value) { + nreg->used = true; + return lightrec_reg_to_lightning(cache, nreg); + } + } + + return -1; +} + +void lightrec_temp_set_value(struct regcache *cache, u8 jit_reg, intptr_t value) +{ + struct native_register *nreg; + + nreg = lightning_reg_to_lightrec(cache, jit_reg); + + nreg->prio = REG_IS_TEMP_VALUE; + nreg->value = value; +} + u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit, u8 reg, u8 flags) { + struct native_register *nreg; u8 jit_reg; - struct native_register *nreg = alloc_in_out(cache, reg, true); + s8 hw_reg; + + hw_reg = lightrec_get_hardwired_reg(reg); + if (hw_reg >= 0) + return (u8) hw_reg; + + nreg = alloc_in_out(cache, reg, true); if (!nreg) { /* No free register, no dirty register to free. */ pr_err("No more registers! Abandon ship!\n"); @@ -260,15 +342,23 @@ u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit, nreg->emulated_register = reg; nreg->extend = flags & REG_EXT; nreg->zero_extend = flags & REG_ZEXT; + nreg->prio = reg ? REG_IS_LOADED : REG_IS_ZERO; return jit_reg; } u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit, u8 reg, u8 flags) { + struct native_register *nreg; u8 jit_reg; bool reg_changed; - struct native_register *nreg = alloc_in_out(cache, reg, false); + s8 hw_reg; + + hw_reg = lightrec_get_hardwired_reg(reg); + if (hw_reg >= 0) + return (u8) hw_reg; + + nreg = alloc_in_out(cache, reg, false); if (!nreg) { /* No free register, no dirty register to free. */ pr_err("No more registers! Abandon ship!\n"); @@ -283,32 +373,28 @@ u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit, if (reg_changed) lightrec_unload_nreg(cache, _jit, nreg, jit_reg); - if (!nreg->loaded && !nreg->dirty && reg != 0) { - s16 offset = offsetof(struct lightrec_state, native_reg_cache) + if (nreg->prio < REG_IS_LOADED && reg != 0) { + s16 offset = offsetof(struct lightrec_state, regs.gpr) + (reg << 2); nreg->zero_extended = flags & REG_ZEXT; nreg->extended = !nreg->zero_extended; /* Load previous value from register cache */ -#if __WORDSIZE == 64 if (nreg->zero_extended) jit_ldxi_ui(jit_reg, LIGHTREC_REG_STATE, offset); else jit_ldxi_i(jit_reg, LIGHTREC_REG_STATE, offset); -#else - jit_ldxi_i(jit_reg, LIGHTREC_REG_STATE, offset); -#endif - nreg->loaded = true; + nreg->prio = REG_IS_LOADED; } /* Clear register r0 before use */ - if (reg == 0 && (!nreg->loaded || nreg->dirty)) { + if (reg == 0 && nreg->prio != REG_IS_ZERO) { jit_movi(jit_reg, 0); nreg->extended = true; nreg->zero_extended = true; - nreg->loaded = true; + nreg->prio = REG_IS_ZERO; } nreg->used = true; @@ -319,16 +405,12 @@ u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit, (!nreg->zero_extended || !(flags & REG_ZEXT))) { nreg->extended = true; nreg->zero_extended = false; -#if __WORDSIZE == 64 jit_extr_i(jit_reg, jit_reg); -#endif } else if (!(flags & REG_EXT) && (flags & REG_ZEXT) && !nreg->zero_extended) { nreg->zero_extended = true; nreg->extended = false; -#if __WORDSIZE == 64 jit_extr_ui(jit_reg, jit_reg); -#endif } return jit_reg; @@ -351,14 +433,14 @@ u8 lightrec_request_reg_in(struct regcache *cache, jit_state_t *_jit, lightrec_unload_nreg(cache, _jit, nreg, jit_reg); /* Load previous value from register cache */ - offset = offsetof(struct lightrec_state, native_reg_cache) + (reg << 2); + offset = offsetof(struct lightrec_state, regs.gpr) + (reg << 2); jit_ldxi_i(jit_reg, LIGHTREC_REG_STATE, offset); nreg->extended = true; nreg->zero_extended = false; nreg->used = true; - nreg->loaded = true; nreg->emulated_register = reg; + nreg->prio = REG_IS_LOADED; return jit_reg; } @@ -367,7 +449,7 @@ static void free_reg(struct native_register *nreg) { /* Set output registers as dirty */ if (nreg->used && nreg->output && nreg->emulated_register > 0) - nreg->dirty = true; + nreg->prio = REG_IS_DIRTY; if (nreg->output) { nreg->extended = nreg->extend; nreg->zero_extended = nreg->zero_extend; @@ -377,7 +459,8 @@ static void free_reg(struct native_register *nreg) void lightrec_free_reg(struct regcache *cache, u8 jit_reg) { - free_reg(lightning_reg_to_lightrec(cache, jit_reg)); + if (!lightrec_reg_is_zero(jit_reg)) + free_reg(lightning_reg_to_lightrec(cache, jit_reg)); } void lightrec_free_regs(struct regcache *cache) @@ -391,13 +474,18 @@ void lightrec_free_regs(struct regcache *cache) static void clean_reg(jit_state_t *_jit, struct native_register *nreg, u8 jit_reg, bool clean) { - if (nreg->dirty) { - s16 offset = offsetof(struct lightrec_state, native_reg_cache) + if (nreg->prio == REG_IS_DIRTY) { + s16 offset = offsetof(struct lightrec_state, regs.gpr) + (nreg->emulated_register << 2); jit_stxi_i(offset, LIGHTREC_REG_STATE, jit_reg); - nreg->loaded |= nreg->dirty; - nreg->dirty ^= clean; + + if (clean) { + if (nreg->emulated_register == 0) + nreg->prio = REG_IS_ZERO; + else + nreg->prio = REG_IS_LOADED; + } } } @@ -405,11 +493,13 @@ static void clean_regs(struct regcache *cache, jit_state_t *_jit, bool clean) { unsigned int i; - for (i = 0; i < NUM_REGS; i++) - clean_reg(_jit, &cache->lightrec_regs[i], JIT_V(i), clean); + for (i = 0; i < NUM_REGS; i++) { + clean_reg(_jit, &cache->lightrec_regs[i], + JIT_V(FIRST_REG + i), clean); + } for (i = 0; i < NUM_TEMPS; i++) { clean_reg(_jit, &cache->lightrec_regs[i + NUM_REGS], - JIT_R(i), clean); + JIT_R(FIRST_TEMP + i), clean); } } @@ -423,10 +513,25 @@ void lightrec_clean_regs(struct regcache *cache, jit_state_t *_jit) clean_regs(cache, _jit, true); } +bool lightrec_has_dirty_regs(struct regcache *cache) +{ + unsigned int i; + + for (i = 0; i < NUM_REGS + NUM_TEMPS; i++) + if (cache->lightrec_regs[i].prio == REG_IS_DIRTY) + return true; + + return false; +} + void lightrec_clean_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg) { - struct native_register *reg = lightning_reg_to_lightrec(cache, jit_reg); - clean_reg(_jit, reg, jit_reg, true); + struct native_register *reg; + + if (!lightrec_reg_is_zero(jit_reg)) { + reg = lightning_reg_to_lightrec(cache, jit_reg); + clean_reg(_jit, reg, jit_reg, true); + } } void lightrec_clean_reg_if_loaded(struct regcache *cache, jit_state_t *_jit, @@ -446,6 +551,15 @@ void lightrec_clean_reg_if_loaded(struct regcache *cache, jit_state_t *_jit, } } +void lightrec_discard_reg_if_loaded(struct regcache *cache, u8 reg) +{ + struct native_register *nreg; + + nreg = find_mapped_reg(cache, reg, false); + if (nreg) + lightrec_discard_nreg(nreg); +} + struct native_register * lightrec_regcache_enter_branch(struct regcache *cache) { struct native_register *backup; @@ -501,15 +615,18 @@ void lightrec_regcache_mark_live(struct regcache *cache, jit_state_t *_jit) for (i = 0; i < NUM_REGS; i++) { nreg = &cache->lightrec_regs[i]; - if (nreg->used || nreg->loaded || nreg->dirty) - jit_live(JIT_V(i)); + if (nreg->used || nreg->prio > REG_IS_TEMP) + jit_live(JIT_V(FIRST_REG + i)); } #endif for (i = 0; i < NUM_TEMPS; i++) { nreg = &cache->lightrec_regs[NUM_REGS + i]; - if (nreg->used || nreg->loaded || nreg->dirty) - jit_live(JIT_R(i)); + if (nreg->used || nreg->prio > REG_IS_TEMP) + jit_live(JIT_R(FIRST_TEMP + i)); } + + jit_live(LIGHTREC_REG_STATE); + jit_live(LIGHTREC_REG_CYCLE); } diff --git a/deps/lightrec/regcache.h b/deps/lightrec/regcache.h index 835c9c92e..cffbf0533 100644 --- a/deps/lightrec/regcache.h +++ b/deps/lightrec/regcache.h @@ -6,12 +6,25 @@ #ifndef __REGCACHE_H__ #define __REGCACHE_H__ -#include "lightrec-private.h" +#include "lightning-wrapper.h" -#define NUM_REGS (JIT_V_NUM - 2) -#define NUM_TEMPS (JIT_R_NUM) +#define NUM_REGS (JIT_V_NUM - 1) #define LIGHTREC_REG_STATE (JIT_V(JIT_V_NUM - 1)) -#define LIGHTREC_REG_CYCLE (JIT_V(JIT_V_NUM - 2)) + +#if defined(__powerpc__) +# define NUM_TEMPS JIT_R_NUM +/* JIT_R0 is callee-saved on PowerPC, we have to use something else */ +# define LIGHTREC_REG_CYCLE _R10 +# define FIRST_TEMP 0 +#else +# define NUM_TEMPS (JIT_R_NUM - 1) +# define LIGHTREC_REG_CYCLE JIT_R0 +# define FIRST_TEMP 1 +#endif + +#include "lightrec-private.h" + +#define FIRST_REG 0 /* Flags for lightrec_alloc_reg_in / lightrec_alloc_reg_out. */ #define REG_EXT BIT(0) /* register is sign-extended */ @@ -35,6 +48,9 @@ u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit, u8 lightrec_request_reg_in(struct regcache *cache, jit_state_t *_jit, u8 reg, u8 jit_reg); +s8 lightrec_get_reg_with_value(struct regcache *cache, intptr_t value); +void lightrec_temp_set_value(struct regcache *cache, u8 jit_reg, intptr_t value); + u8 lightrec_get_reg_in_flags(struct regcache *cache, u8 jit_reg); void lightrec_set_reg_out_flags(struct regcache *cache, u8 jit_reg, u8 flags); @@ -47,9 +63,11 @@ void lightrec_clean_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg); void lightrec_clean_regs(struct regcache *cache, jit_state_t *_jit); void lightrec_unload_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg); void lightrec_storeback_regs(struct regcache *cache, jit_state_t *_jit); +_Bool lightrec_has_dirty_regs(struct regcache *cache); void lightrec_clean_reg_if_loaded(struct regcache *cache, jit_state_t *_jit, u8 reg, _Bool unload); +void lightrec_discard_reg_if_loaded(struct regcache *cache, u8 reg); u8 lightrec_alloc_reg_in_address(struct regcache *cache, jit_state_t *_jit, u8 reg, s16 offset); diff --git a/deps/lightrec/slist.h b/deps/lightrec/slist.h index ae7e5d3e1..37557e64d 100644 --- a/deps/lightrec/slist.h +++ b/deps/lightrec/slist.h @@ -6,6 +6,8 @@ #ifndef __LIGHTREC_SLIST_H__ #define __LIGHTREC_SLIST_H__ +#include + #define container_of(ptr, type, member) \ ((type *)((void *)(ptr) - offsetof(type, member))) diff --git a/deps/lightrec/tlsf/.gitrepo b/deps/lightrec/tlsf/.gitrepo new file mode 100644 index 000000000..692e54257 --- /dev/null +++ b/deps/lightrec/tlsf/.gitrepo @@ -0,0 +1,12 @@ +; DO NOT EDIT (unless you know what you are doing) +; +; This subdirectory is a git "subrepo", and this file is maintained by the +; git-subrepo command. See https://github.com/git-commands/git-subrepo#readme +; +[subrepo] + remote = https://github.com/mattconte/tlsf + branch = master + commit = deff9ab509341f264addbd3c8ada533678591905 + parent = 1dc0344052e7379e16753e4a285c30fd158bf78d + method = merge + cmdver = 0.4.3 diff --git a/deps/lightrec/tlsf/README.md b/deps/lightrec/tlsf/README.md new file mode 100644 index 000000000..982919fc7 --- /dev/null +++ b/deps/lightrec/tlsf/README.md @@ -0,0 +1,92 @@ +# tlsf +Two-Level Segregated Fit memory allocator implementation. +Written by Matthew Conte (matt@baisoku.org). +Released under the BSD license. + +Features +-------- + * O(1) cost for malloc, free, realloc, memalign + * Extremely low overhead per allocation (4 bytes) + * Low overhead per TLSF management of pools (~3kB) + * Low fragmentation + * Compiles to only a few kB of code and data + * Support for adding and removing memory pool regions on the fly + +Caveats +------- + * Currently, assumes architecture can make 4-byte aligned accesses + * Not designed to be thread safe; the user must provide this + +Notes +----- +This code was based on the TLSF 1.4 spec and documentation found at: + + http://www.gii.upv.es/tlsf/main/docs + +It also leverages the TLSF 2.0 improvement to shrink the per-block overhead from 8 to 4 bytes. + +History +------- +2016/04/10 - v3.1 + * Code moved to github + * tlsfbits.h rolled into tlsf.c + * License changed to BSD + +2014/02/08 - v3.0 + * This version is based on improvements from 3DInteractive GmbH + * Interface changed to allow more than one memory pool + * Separated pool handling from control structure (adding, removing, debugging) + * Control structure and pools can still be constructed in the same memory block + * Memory blocks for control structure and pools are checked for alignment + * Added functions to retrieve control structure size, alignment size, min and max block size, overhead of pool structure, and overhead of a single allocation + * Minimal Pool size is tlsf_block_size_min() + tlsf_pool_overhead() + * Pool must be empty when it is removed, in order to allow O(1) removal + +2011/10/20 - v2.0 + * 64-bit support + * More compiler intrinsics for ffs/fls + * ffs/fls verification during TLSF creation in debug builds + +2008/04/04 - v1.9 + * Add tlsf_heap_check, a heap integrity check + * Support a predefined tlsf_assert macro + * Fix realloc case where block should shrink; if adjacent block is in use, execution would go down the slow path + +2007/02/08 - v1.8 + * Fix for unnecessary reallocation in tlsf_realloc + +2007/02/03 - v1.7 + * tlsf_heap_walk takes a callback + * tlsf_realloc now returns NULL on failure + * tlsf_memalign optimization for 4-byte alignment + * Usage of size_t where appropriate + +2006/11/21 - v1.6 + * ffs/fls broken out into tlsfbits.h + * tlsf_overhead queries per-pool overhead + +2006/11/07 - v1.5 + * Smart realloc implementation + * Smart memalign implementation + +2006/10/11 - v1.4 + * Add some ffs/fls implementations + * Minor code footprint reduction + +2006/09/14 - v1.3 + * Profiling indicates heavy use of blocks of size 1-128, so implement small block handling + * Reduce pool overhead by about 1kb + * Reduce minimum block size from 32 to 12 bytes + * Realloc bug fix + +2006/09/09 - v1.2 + * Add tlsf_block_size + * Static assertion mechanism for invariants + * Minor bugfixes + +2006/09/01 - v1.1 + * Add tlsf_realloc + * Add tlsf_walk_heap + +2006/08/25 - v1.0 + * First release diff --git a/deps/lightrec/tlsf/tlsf.c b/deps/lightrec/tlsf/tlsf.c new file mode 100644 index 000000000..af575737c --- /dev/null +++ b/deps/lightrec/tlsf/tlsf.c @@ -0,0 +1,1264 @@ +#include +#include +#include +#include +#include +#include + +#include "tlsf.h" + +#if defined(__cplusplus) +#define tlsf_decl inline +#else +#define tlsf_decl static +#endif + +/* +** Architecture-specific bit manipulation routines. +** +** TLSF achieves O(1) cost for malloc and free operations by limiting +** the search for a free block to a free list of guaranteed size +** adequate to fulfill the request, combined with efficient free list +** queries using bitmasks and architecture-specific bit-manipulation +** routines. +** +** Most modern processors provide instructions to count leading zeroes +** in a word, find the lowest and highest set bit, etc. These +** specific implementations will be used when available, falling back +** to a reasonably efficient generic implementation. +** +** NOTE: TLSF spec relies on ffs/fls returning value 0..31. +** ffs/fls return 1-32 by default, returning 0 for error. +*/ + +/* +** Detect whether or not we are building for a 32- or 64-bit (LP/LLP) +** architecture. There is no reliable portable method at compile-time. +*/ +#if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) \ + || defined (_WIN64) || defined (__LP64__) || defined (__LLP64__) +#define TLSF_64BIT +#endif + +/* +** gcc 3.4 and above have builtin support, specialized for architecture. +** Some compilers masquerade as gcc; patchlevel test filters them out. +*/ +#if defined (__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) \ + && defined (__GNUC_PATCHLEVEL__) + +#if defined (__SNC__) +/* SNC for Playstation 3. */ + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + const unsigned int reverse = word & (~word + 1); + const int bit = 32 - __builtin_clz(reverse); + return bit - 1; +} + +#else + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + return __builtin_ffs(word) - 1; +} + +#endif + +tlsf_decl int tlsf_fls(unsigned int word) +{ + const int bit = word ? 32 - __builtin_clz(word) : 0; + return bit - 1; +} + +#elif defined (_MSC_VER) && (_MSC_VER >= 1400) && (defined (_M_IX86) || defined (_M_X64)) +/* Microsoft Visual C++ support on x86/X64 architectures. */ + +#include + +#pragma intrinsic(_BitScanReverse) +#pragma intrinsic(_BitScanForward) + +tlsf_decl int tlsf_fls(unsigned int word) +{ + unsigned long index; + return _BitScanReverse(&index, word) ? index : -1; +} + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + unsigned long index; + return _BitScanForward(&index, word) ? index : -1; +} + +#elif defined (_MSC_VER) && defined (_M_PPC) +/* Microsoft Visual C++ support on PowerPC architectures. */ + +#include + +tlsf_decl int tlsf_fls(unsigned int word) +{ + const int bit = 32 - _CountLeadingZeros(word); + return bit - 1; +} + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + const unsigned int reverse = word & (~word + 1); + const int bit = 32 - _CountLeadingZeros(reverse); + return bit - 1; +} + +#elif defined (__ARMCC_VERSION) +/* RealView Compilation Tools for ARM */ + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + const unsigned int reverse = word & (~word + 1); + const int bit = 32 - __clz(reverse); + return bit - 1; +} + +tlsf_decl int tlsf_fls(unsigned int word) +{ + const int bit = word ? 32 - __clz(word) : 0; + return bit - 1; +} + +#elif defined (__ghs__) +/* Green Hills support for PowerPC */ + +#include + +tlsf_decl int tlsf_ffs(unsigned int word) +{ + const unsigned int reverse = word & (~word + 1); + const int bit = 32 - __CLZ32(reverse); + return bit - 1; +} + +tlsf_decl int tlsf_fls(unsigned int word) +{ + const int bit = word ? 32 - __CLZ32(word) : 0; + return bit - 1; +} + +#else +/* Fall back to generic implementation. */ + +tlsf_decl int tlsf_fls_generic(unsigned int word) +{ + int bit = 32; + + if (!word) bit -= 1; + if (!(word & 0xffff0000)) { word <<= 16; bit -= 16; } + if (!(word & 0xff000000)) { word <<= 8; bit -= 8; } + if (!(word & 0xf0000000)) { word <<= 4; bit -= 4; } + if (!(word & 0xc0000000)) { word <<= 2; bit -= 2; } + if (!(word & 0x80000000)) { word <<= 1; bit -= 1; } + + return bit; +} + +/* Implement ffs in terms of fls. */ +tlsf_decl int tlsf_ffs(unsigned int word) +{ + return tlsf_fls_generic(word & (~word + 1)) - 1; +} + +tlsf_decl int tlsf_fls(unsigned int word) +{ + return tlsf_fls_generic(word) - 1; +} + +#endif + +/* Possibly 64-bit version of tlsf_fls. */ +#if defined (TLSF_64BIT) +tlsf_decl int tlsf_fls_sizet(size_t size) +{ + int high = (int)(size >> 32); + int bits = 0; + if (high) + { + bits = 32 + tlsf_fls(high); + } + else + { + bits = tlsf_fls((int)size & 0xffffffff); + + } + return bits; +} +#else +#define tlsf_fls_sizet tlsf_fls +#endif + +#undef tlsf_decl + +/* +** Constants. +*/ + +/* Public constants: may be modified. */ +enum tlsf_public +{ + /* log2 of number of linear subdivisions of block sizes. Larger + ** values require more memory in the control structure. Values of + ** 4 or 5 are typical. + */ + SL_INDEX_COUNT_LOG2 = 5, +}; + +/* Private constants: do not modify. */ +enum tlsf_private +{ +#if defined (TLSF_64BIT) + /* All allocation sizes and addresses are aligned to 8 bytes. */ + ALIGN_SIZE_LOG2 = 3, +#else + /* All allocation sizes and addresses are aligned to 4 bytes. */ + ALIGN_SIZE_LOG2 = 2, +#endif + ALIGN_SIZE = (1 << ALIGN_SIZE_LOG2), + + /* + ** We support allocations of sizes up to (1 << FL_INDEX_MAX) bits. + ** However, because we linearly subdivide the second-level lists, and + ** our minimum size granularity is 4 bytes, it doesn't make sense to + ** create first-level lists for sizes smaller than SL_INDEX_COUNT * 4, + ** or (1 << (SL_INDEX_COUNT_LOG2 + 2)) bytes, as there we will be + ** trying to split size ranges into more slots than we have available. + ** Instead, we calculate the minimum threshold size, and place all + ** blocks below that size into the 0th first-level list. + */ + +#if defined (TLSF_64BIT) + /* + ** TODO: We can increase this to support larger sizes, at the expense + ** of more overhead in the TLSF structure. + */ + FL_INDEX_MAX = 32, +#else + FL_INDEX_MAX = 30, +#endif + SL_INDEX_COUNT = (1 << SL_INDEX_COUNT_LOG2), + FL_INDEX_SHIFT = (SL_INDEX_COUNT_LOG2 + ALIGN_SIZE_LOG2), + FL_INDEX_COUNT = (FL_INDEX_MAX - FL_INDEX_SHIFT + 1), + + SMALL_BLOCK_SIZE = (1 << FL_INDEX_SHIFT), +}; + +/* +** Cast and min/max macros. +*/ + +#define tlsf_cast(t, exp) ((t) (exp)) +#define tlsf_min(a, b) ((a) < (b) ? (a) : (b)) +#define tlsf_max(a, b) ((a) > (b) ? (a) : (b)) + +/* +** Set assert macro, if it has not been provided by the user. +*/ +#if !defined (tlsf_assert) +#define tlsf_assert assert +#endif + +/* +** Static assertion mechanism. +*/ + +#define _tlsf_glue2(x, y) x ## y +#define _tlsf_glue(x, y) _tlsf_glue2(x, y) +#define tlsf_static_assert(exp) \ + typedef char _tlsf_glue(static_assert, __LINE__) [(exp) ? 1 : -1] + +/* This code has been tested on 32- and 64-bit (LP/LLP) architectures. */ +tlsf_static_assert(sizeof(int) * CHAR_BIT == 32); +tlsf_static_assert(sizeof(size_t) * CHAR_BIT >= 32); +tlsf_static_assert(sizeof(size_t) * CHAR_BIT <= 64); + +/* SL_INDEX_COUNT must be <= number of bits in sl_bitmap's storage type. */ +tlsf_static_assert(sizeof(unsigned int) * CHAR_BIT >= SL_INDEX_COUNT); + +/* Ensure we've properly tuned our sizes. */ +tlsf_static_assert(ALIGN_SIZE == SMALL_BLOCK_SIZE / SL_INDEX_COUNT); + +/* +** Data structures and associated constants. +*/ + +/* +** Block header structure. +** +** There are several implementation subtleties involved: +** - The prev_phys_block field is only valid if the previous block is free. +** - The prev_phys_block field is actually stored at the end of the +** previous block. It appears at the beginning of this structure only to +** simplify the implementation. +** - The next_free / prev_free fields are only valid if the block is free. +*/ +typedef struct block_header_t +{ + /* Points to the previous physical block. */ + struct block_header_t* prev_phys_block; + + /* The size of this block, excluding the block header. */ + size_t size; + + /* Next and previous free blocks. */ + struct block_header_t* next_free; + struct block_header_t* prev_free; +} block_header_t; + +/* +** Since block sizes are always at least a multiple of 4, the two least +** significant bits of the size field are used to store the block status: +** - bit 0: whether block is busy or free +** - bit 1: whether previous block is busy or free +*/ +static const size_t block_header_free_bit = 1 << 0; +static const size_t block_header_prev_free_bit = 1 << 1; + +/* +** The size of the block header exposed to used blocks is the size field. +** The prev_phys_block field is stored *inside* the previous free block. +*/ +static const size_t block_header_overhead = sizeof(size_t); + +/* User data starts directly after the size field in a used block. */ +static const size_t block_start_offset = + offsetof(block_header_t, size) + sizeof(size_t); + +/* +** A free block must be large enough to store its header minus the size of +** the prev_phys_block field, and no larger than the number of addressable +** bits for FL_INDEX. +*/ +static const size_t block_size_min = + sizeof(block_header_t) - sizeof(block_header_t*); +static const size_t block_size_max = tlsf_cast(size_t, 1) << FL_INDEX_MAX; + + +/* The TLSF control structure. */ +typedef struct control_t +{ + /* Empty lists point at this block to indicate they are free. */ + block_header_t block_null; + + /* Bitmaps for free lists. */ + unsigned int fl_bitmap; + unsigned int sl_bitmap[FL_INDEX_COUNT]; + + /* Head of free lists. */ + block_header_t* blocks[FL_INDEX_COUNT][SL_INDEX_COUNT]; +} control_t; + +/* A type used for casting when doing pointer arithmetic. */ +typedef ptrdiff_t tlsfptr_t; + +/* +** block_header_t member functions. +*/ + +static size_t block_size(const block_header_t* block) +{ + return block->size & ~(block_header_free_bit | block_header_prev_free_bit); +} + +static void block_set_size(block_header_t* block, size_t size) +{ + const size_t oldsize = block->size; + block->size = size | (oldsize & (block_header_free_bit | block_header_prev_free_bit)); +} + +static int block_is_last(const block_header_t* block) +{ + return block_size(block) == 0; +} + +static int block_is_free(const block_header_t* block) +{ + return tlsf_cast(int, block->size & block_header_free_bit); +} + +static void block_set_free(block_header_t* block) +{ + block->size |= block_header_free_bit; +} + +static void block_set_used(block_header_t* block) +{ + block->size &= ~block_header_free_bit; +} + +static int block_is_prev_free(const block_header_t* block) +{ + return tlsf_cast(int, block->size & block_header_prev_free_bit); +} + +static void block_set_prev_free(block_header_t* block) +{ + block->size |= block_header_prev_free_bit; +} + +static void block_set_prev_used(block_header_t* block) +{ + block->size &= ~block_header_prev_free_bit; +} + +static block_header_t* block_from_ptr(const void* ptr) +{ + return tlsf_cast(block_header_t*, + tlsf_cast(unsigned char*, ptr) - block_start_offset); +} + +static void* block_to_ptr(const block_header_t* block) +{ + return tlsf_cast(void*, + tlsf_cast(unsigned char*, block) + block_start_offset); +} + +/* Return location of next block after block of given size. */ +static block_header_t* offset_to_block(const void* ptr, size_t size) +{ + return tlsf_cast(block_header_t*, tlsf_cast(tlsfptr_t, ptr) + size); +} + +/* Return location of previous block. */ +static block_header_t* block_prev(const block_header_t* block) +{ + tlsf_assert(block_is_prev_free(block) && "previous block must be free"); + return block->prev_phys_block; +} + +/* Return location of next existing block. */ +static block_header_t* block_next(const block_header_t* block) +{ + block_header_t* next = offset_to_block(block_to_ptr(block), + block_size(block) - block_header_overhead); + tlsf_assert(!block_is_last(block)); + return next; +} + +/* Link a new block with its physical neighbor, return the neighbor. */ +static block_header_t* block_link_next(block_header_t* block) +{ + block_header_t* next = block_next(block); + next->prev_phys_block = block; + return next; +} + +static void block_mark_as_free(block_header_t* block) +{ + /* Link the block to the next block, first. */ + block_header_t* next = block_link_next(block); + block_set_prev_free(next); + block_set_free(block); +} + +static void block_mark_as_used(block_header_t* block) +{ + block_header_t* next = block_next(block); + block_set_prev_used(next); + block_set_used(block); +} + +static size_t align_up(size_t x, size_t align) +{ + tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two"); + return (x + (align - 1)) & ~(align - 1); +} + +static size_t align_down(size_t x, size_t align) +{ + tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two"); + return x - (x & (align - 1)); +} + +static void* align_ptr(const void* ptr, size_t align) +{ + const tlsfptr_t aligned = + (tlsf_cast(tlsfptr_t, ptr) + (align - 1)) & ~(align - 1); + tlsf_assert(0 == (align & (align - 1)) && "must align to a power of two"); + return tlsf_cast(void*, aligned); +} + +/* +** Adjust an allocation size to be aligned to word size, and no smaller +** than internal minimum. +*/ +static size_t adjust_request_size(size_t size, size_t align) +{ + size_t adjust = 0; + if (size) + { + const size_t aligned = align_up(size, align); + + /* aligned sized must not exceed block_size_max or we'll go out of bounds on sl_bitmap */ + if (aligned < block_size_max) + { + adjust = tlsf_max(aligned, block_size_min); + } + } + return adjust; +} + +/* +** TLSF utility functions. In most cases, these are direct translations of +** the documentation found in the white paper. +*/ + +static void mapping_insert(size_t size, int* fli, int* sli) +{ + int fl, sl; + if (size < SMALL_BLOCK_SIZE) + { + /* Store small blocks in first list. */ + fl = 0; + sl = tlsf_cast(int, size) / (SMALL_BLOCK_SIZE / SL_INDEX_COUNT); + } + else + { + fl = tlsf_fls_sizet(size); + sl = tlsf_cast(int, size >> (fl - SL_INDEX_COUNT_LOG2)) ^ (1 << SL_INDEX_COUNT_LOG2); + fl -= (FL_INDEX_SHIFT - 1); + } + *fli = fl; + *sli = sl; +} + +/* This version rounds up to the next block size (for allocations) */ +static void mapping_search(size_t size, int* fli, int* sli) +{ + if (size >= SMALL_BLOCK_SIZE) + { + const size_t round = (1 << (tlsf_fls_sizet(size) - SL_INDEX_COUNT_LOG2)) - 1; + size += round; + } + mapping_insert(size, fli, sli); +} + +static block_header_t* search_suitable_block(control_t* control, int* fli, int* sli) +{ + int fl = *fli; + int sl = *sli; + + /* + ** First, search for a block in the list associated with the given + ** fl/sl index. + */ + unsigned int sl_map = control->sl_bitmap[fl] & (~0U << sl); + if (!sl_map) + { + /* No block exists. Search in the next largest first-level list. */ + const unsigned int fl_map = control->fl_bitmap & (~0U << (fl + 1)); + if (!fl_map) + { + /* No free blocks available, memory has been exhausted. */ + return 0; + } + + fl = tlsf_ffs(fl_map); + *fli = fl; + sl_map = control->sl_bitmap[fl]; + } + tlsf_assert(sl_map && "internal error - second level bitmap is null"); + sl = tlsf_ffs(sl_map); + *sli = sl; + + /* Return the first block in the free list. */ + return control->blocks[fl][sl]; +} + +/* Remove a free block from the free list.*/ +static void remove_free_block(control_t* control, block_header_t* block, int fl, int sl) +{ + block_header_t* prev = block->prev_free; + block_header_t* next = block->next_free; + tlsf_assert(prev && "prev_free field can not be null"); + tlsf_assert(next && "next_free field can not be null"); + next->prev_free = prev; + prev->next_free = next; + + /* If this block is the head of the free list, set new head. */ + if (control->blocks[fl][sl] == block) + { + control->blocks[fl][sl] = next; + + /* If the new head is null, clear the bitmap. */ + if (next == &control->block_null) + { + control->sl_bitmap[fl] &= ~(1U << sl); + + /* If the second bitmap is now empty, clear the fl bitmap. */ + if (!control->sl_bitmap[fl]) + { + control->fl_bitmap &= ~(1U << fl); + } + } + } +} + +/* Insert a free block into the free block list. */ +static void insert_free_block(control_t* control, block_header_t* block, int fl, int sl) +{ + block_header_t* current = control->blocks[fl][sl]; + tlsf_assert(current && "free list cannot have a null entry"); + tlsf_assert(block && "cannot insert a null entry into the free list"); + block->next_free = current; + block->prev_free = &control->block_null; + current->prev_free = block; + + tlsf_assert(block_to_ptr(block) == align_ptr(block_to_ptr(block), ALIGN_SIZE) + && "block not aligned properly"); + /* + ** Insert the new block at the head of the list, and mark the first- + ** and second-level bitmaps appropriately. + */ + control->blocks[fl][sl] = block; + control->fl_bitmap |= (1U << fl); + control->sl_bitmap[fl] |= (1U << sl); +} + +/* Remove a given block from the free list. */ +static void block_remove(control_t* control, block_header_t* block) +{ + int fl, sl; + mapping_insert(block_size(block), &fl, &sl); + remove_free_block(control, block, fl, sl); +} + +/* Insert a given block into the free list. */ +static void block_insert(control_t* control, block_header_t* block) +{ + int fl, sl; + mapping_insert(block_size(block), &fl, &sl); + insert_free_block(control, block, fl, sl); +} + +static int block_can_split(block_header_t* block, size_t size) +{ + return block_size(block) >= sizeof(block_header_t) + size; +} + +/* Split a block into two, the second of which is free. */ +static block_header_t* block_split(block_header_t* block, size_t size) +{ + /* Calculate the amount of space left in the remaining block. */ + block_header_t* remaining = + offset_to_block(block_to_ptr(block), size - block_header_overhead); + + const size_t remain_size = block_size(block) - (size + block_header_overhead); + + tlsf_assert(block_to_ptr(remaining) == align_ptr(block_to_ptr(remaining), ALIGN_SIZE) + && "remaining block not aligned properly"); + + tlsf_assert(block_size(block) == remain_size + size + block_header_overhead); + block_set_size(remaining, remain_size); + tlsf_assert(block_size(remaining) >= block_size_min && "block split with invalid size"); + + block_set_size(block, size); + block_mark_as_free(remaining); + + return remaining; +} + +/* Absorb a free block's storage into an adjacent previous free block. */ +static block_header_t* block_absorb(block_header_t* prev, block_header_t* block) +{ + tlsf_assert(!block_is_last(prev) && "previous block can't be last"); + /* Note: Leaves flags untouched. */ + prev->size += block_size(block) + block_header_overhead; + block_link_next(prev); + return prev; +} + +/* Merge a just-freed block with an adjacent previous free block. */ +static block_header_t* block_merge_prev(control_t* control, block_header_t* block) +{ + if (block_is_prev_free(block)) + { + block_header_t* prev = block_prev(block); + tlsf_assert(prev && "prev physical block can't be null"); + tlsf_assert(block_is_free(prev) && "prev block is not free though marked as such"); + block_remove(control, prev); + block = block_absorb(prev, block); + } + + return block; +} + +/* Merge a just-freed block with an adjacent free block. */ +static block_header_t* block_merge_next(control_t* control, block_header_t* block) +{ + block_header_t* next = block_next(block); + tlsf_assert(next && "next physical block can't be null"); + + if (block_is_free(next)) + { + tlsf_assert(!block_is_last(block) && "previous block can't be last"); + block_remove(control, next); + block = block_absorb(block, next); + } + + return block; +} + +/* Trim any trailing block space off the end of a block, return to pool. */ +static void block_trim_free(control_t* control, block_header_t* block, size_t size) +{ + tlsf_assert(block_is_free(block) && "block must be free"); + if (block_can_split(block, size)) + { + block_header_t* remaining_block = block_split(block, size); + block_link_next(block); + block_set_prev_free(remaining_block); + block_insert(control, remaining_block); + } +} + +/* Trim any trailing block space off the end of a used block, return to pool. */ +static void block_trim_used(control_t* control, block_header_t* block, size_t size) +{ + tlsf_assert(!block_is_free(block) && "block must be used"); + if (block_can_split(block, size)) + { + /* If the next block is free, we must coalesce. */ + block_header_t* remaining_block = block_split(block, size); + block_set_prev_used(remaining_block); + + remaining_block = block_merge_next(control, remaining_block); + block_insert(control, remaining_block); + } +} + +static block_header_t* block_trim_free_leading(control_t* control, block_header_t* block, size_t size) +{ + block_header_t* remaining_block = block; + if (block_can_split(block, size)) + { + /* We want the 2nd block. */ + remaining_block = block_split(block, size - block_header_overhead); + block_set_prev_free(remaining_block); + + block_link_next(block); + block_insert(control, block); + } + + return remaining_block; +} + +static block_header_t* block_locate_free(control_t* control, size_t size) +{ + int fl = 0, sl = 0; + block_header_t* block = 0; + + if (size) + { + mapping_search(size, &fl, &sl); + + /* + ** mapping_search can futz with the size, so for excessively large sizes it can sometimes wind up + ** with indices that are off the end of the block array. + ** So, we protect against that here, since this is the only callsite of mapping_search. + ** Note that we don't need to check sl, since it comes from a modulo operation that guarantees it's always in range. + */ + if (fl < FL_INDEX_COUNT) + { + block = search_suitable_block(control, &fl, &sl); + } + } + + if (block) + { + tlsf_assert(block_size(block) >= size); + remove_free_block(control, block, fl, sl); + } + + return block; +} + +static void* block_prepare_used(control_t* control, block_header_t* block, size_t size) +{ + void* p = 0; + if (block) + { + tlsf_assert(size && "size must be non-zero"); + block_trim_free(control, block, size); + block_mark_as_used(block); + p = block_to_ptr(block); + } + return p; +} + +/* Clear structure and point all empty lists at the null block. */ +static void control_construct(control_t* control) +{ + int i, j; + + control->block_null.next_free = &control->block_null; + control->block_null.prev_free = &control->block_null; + + control->fl_bitmap = 0; + for (i = 0; i < FL_INDEX_COUNT; ++i) + { + control->sl_bitmap[i] = 0; + for (j = 0; j < SL_INDEX_COUNT; ++j) + { + control->blocks[i][j] = &control->block_null; + } + } +} + +/* +** Debugging utilities. +*/ + +typedef struct integrity_t +{ + int prev_status; + int status; +} integrity_t; + +#define tlsf_insist(x) { tlsf_assert(x); if (!(x)) { status--; } } + +static void integrity_walker(void* ptr, size_t size, int used, void* user) +{ + block_header_t* block = block_from_ptr(ptr); + integrity_t* integ = tlsf_cast(integrity_t*, user); + const int this_prev_status = block_is_prev_free(block) ? 1 : 0; + const int this_status = block_is_free(block) ? 1 : 0; + const size_t this_block_size = block_size(block); + + int status = 0; + (void)used; + tlsf_insist(integ->prev_status == this_prev_status && "prev status incorrect"); + tlsf_insist(size == this_block_size && "block size incorrect"); + + integ->prev_status = this_status; + integ->status += status; +} + +int tlsf_check(tlsf_t tlsf) +{ + int i, j; + + control_t* control = tlsf_cast(control_t*, tlsf); + int status = 0; + + /* Check that the free lists and bitmaps are accurate. */ + for (i = 0; i < FL_INDEX_COUNT; ++i) + { + for (j = 0; j < SL_INDEX_COUNT; ++j) + { + const int fl_map = control->fl_bitmap & (1U << i); + const int sl_list = control->sl_bitmap[i]; + const int sl_map = sl_list & (1U << j); + const block_header_t* block = control->blocks[i][j]; + + /* Check that first- and second-level lists agree. */ + if (!fl_map) + { + tlsf_insist(!sl_map && "second-level map must be null"); + } + + if (!sl_map) + { + tlsf_insist(block == &control->block_null && "block list must be null"); + continue; + } + + /* Check that there is at least one free block. */ + tlsf_insist(sl_list && "no free blocks in second-level map"); + tlsf_insist(block != &control->block_null && "block should not be null"); + + while (block != &control->block_null) + { + int fli, sli; + tlsf_insist(block_is_free(block) && "block should be free"); + tlsf_insist(!block_is_prev_free(block) && "blocks should have coalesced"); + tlsf_insist(!block_is_free(block_next(block)) && "blocks should have coalesced"); + tlsf_insist(block_is_prev_free(block_next(block)) && "block should be free"); + tlsf_insist(block_size(block) >= block_size_min && "block not minimum size"); + + mapping_insert(block_size(block), &fli, &sli); + tlsf_insist(fli == i && sli == j && "block size indexed in wrong list"); + block = block->next_free; + } + } + } + + return status; +} + +#undef tlsf_insist + +static void default_walker(void* ptr, size_t size, int used, void* user) +{ + (void)user; + printf("\t%p %s size: %x (%p)\n", ptr, used ? "used" : "free", (unsigned int)size, block_from_ptr(ptr)); +} + +void tlsf_walk_pool(pool_t pool, tlsf_walker walker, void* user) +{ + tlsf_walker pool_walker = walker ? walker : default_walker; + block_header_t* block = + offset_to_block(pool, -(int)block_header_overhead); + + while (block && !block_is_last(block)) + { + pool_walker( + block_to_ptr(block), + block_size(block), + !block_is_free(block), + user); + block = block_next(block); + } +} + +size_t tlsf_block_size(void* ptr) +{ + size_t size = 0; + if (ptr) + { + const block_header_t* block = block_from_ptr(ptr); + size = block_size(block); + } + return size; +} + +int tlsf_check_pool(pool_t pool) +{ + /* Check that the blocks are physically correct. */ + integrity_t integ = { 0, 0 }; + tlsf_walk_pool(pool, integrity_walker, &integ); + + return integ.status; +} + +/* +** Size of the TLSF structures in a given memory block passed to +** tlsf_create, equal to the size of a control_t +*/ +size_t tlsf_size(void) +{ + return sizeof(control_t); +} + +size_t tlsf_align_size(void) +{ + return ALIGN_SIZE; +} + +size_t tlsf_block_size_min(void) +{ + return block_size_min; +} + +size_t tlsf_block_size_max(void) +{ + return block_size_max; +} + +/* +** Overhead of the TLSF structures in a given memory block passed to +** tlsf_add_pool, equal to the overhead of a free block and the +** sentinel block. +*/ +size_t tlsf_pool_overhead(void) +{ + return 2 * block_header_overhead; +} + +size_t tlsf_alloc_overhead(void) +{ + return block_header_overhead; +} + +pool_t tlsf_add_pool(tlsf_t tlsf, void* mem, size_t bytes) +{ + block_header_t* block; + block_header_t* next; + + const size_t pool_overhead = tlsf_pool_overhead(); + const size_t pool_bytes = align_down(bytes - pool_overhead, ALIGN_SIZE); + + if (((ptrdiff_t)mem % ALIGN_SIZE) != 0) + { + printf("tlsf_add_pool: Memory must be aligned by %u bytes.\n", + (unsigned int)ALIGN_SIZE); + return 0; + } + + if (pool_bytes < block_size_min || pool_bytes > block_size_max) + { +#if defined (TLSF_64BIT) + printf("tlsf_add_pool: Memory size must be between 0x%x and 0x%x00 bytes.\n", + (unsigned int)(pool_overhead + block_size_min), + (unsigned int)((pool_overhead + block_size_max) / 256)); +#else + printf("tlsf_add_pool: Memory size must be between %u and %u bytes.\n", + (unsigned int)(pool_overhead + block_size_min), + (unsigned int)(pool_overhead + block_size_max)); +#endif + return 0; + } + + /* + ** Create the main free block. Offset the start of the block slightly + ** so that the prev_phys_block field falls outside of the pool - + ** it will never be used. + */ + block = offset_to_block(mem, -(tlsfptr_t)block_header_overhead); + block_set_size(block, pool_bytes); + block_set_free(block); + block_set_prev_used(block); + block_insert(tlsf_cast(control_t*, tlsf), block); + + /* Split the block to create a zero-size sentinel block. */ + next = block_link_next(block); + block_set_size(next, 0); + block_set_used(next); + block_set_prev_free(next); + + return mem; +} + +void tlsf_remove_pool(tlsf_t tlsf, pool_t pool) +{ + control_t* control = tlsf_cast(control_t*, tlsf); + block_header_t* block = offset_to_block(pool, -(int)block_header_overhead); + + int fl = 0, sl = 0; + + tlsf_assert(block_is_free(block) && "block should be free"); + tlsf_assert(!block_is_free(block_next(block)) && "next block should not be free"); + tlsf_assert(block_size(block_next(block)) == 0 && "next block size should be zero"); + + mapping_insert(block_size(block), &fl, &sl); + remove_free_block(control, block, fl, sl); +} + +/* +** TLSF main interface. +*/ + +#if _DEBUG +int test_ffs_fls() +{ + /* Verify ffs/fls work properly. */ + int rv = 0; + rv += (tlsf_ffs(0) == -1) ? 0 : 0x1; + rv += (tlsf_fls(0) == -1) ? 0 : 0x2; + rv += (tlsf_ffs(1) == 0) ? 0 : 0x4; + rv += (tlsf_fls(1) == 0) ? 0 : 0x8; + rv += (tlsf_ffs(0x80000000) == 31) ? 0 : 0x10; + rv += (tlsf_ffs(0x80008000) == 15) ? 0 : 0x20; + rv += (tlsf_fls(0x80000008) == 31) ? 0 : 0x40; + rv += (tlsf_fls(0x7FFFFFFF) == 30) ? 0 : 0x80; + +#if defined (TLSF_64BIT) + rv += (tlsf_fls_sizet(0x80000000) == 31) ? 0 : 0x100; + rv += (tlsf_fls_sizet(0x100000000) == 32) ? 0 : 0x200; + rv += (tlsf_fls_sizet(0xffffffffffffffff) == 63) ? 0 : 0x400; +#endif + + if (rv) + { + printf("test_ffs_fls: %x ffs/fls tests failed.\n", rv); + } + return rv; +} +#endif + +tlsf_t tlsf_create(void* mem) +{ +#if _DEBUG + if (test_ffs_fls()) + { + return 0; + } +#endif + + if (((tlsfptr_t)mem % ALIGN_SIZE) != 0) + { + printf("tlsf_create: Memory must be aligned to %u bytes.\n", + (unsigned int)ALIGN_SIZE); + return 0; + } + + control_construct(tlsf_cast(control_t*, mem)); + + return tlsf_cast(tlsf_t, mem); +} + +tlsf_t tlsf_create_with_pool(void* mem, size_t bytes) +{ + tlsf_t tlsf = tlsf_create(mem); + tlsf_add_pool(tlsf, (char*)mem + tlsf_size(), bytes - tlsf_size()); + return tlsf; +} + +void tlsf_destroy(tlsf_t tlsf) +{ + /* Nothing to do. */ + (void)tlsf; +} + +pool_t tlsf_get_pool(tlsf_t tlsf) +{ + return tlsf_cast(pool_t, (char*)tlsf + tlsf_size()); +} + +void* tlsf_malloc(tlsf_t tlsf, size_t size) +{ + control_t* control = tlsf_cast(control_t*, tlsf); + const size_t adjust = adjust_request_size(size, ALIGN_SIZE); + block_header_t* block = block_locate_free(control, adjust); + return block_prepare_used(control, block, adjust); +} + +void* tlsf_memalign(tlsf_t tlsf, size_t align, size_t size) +{ + control_t* control = tlsf_cast(control_t*, tlsf); + const size_t adjust = adjust_request_size(size, ALIGN_SIZE); + + /* + ** We must allocate an additional minimum block size bytes so that if + ** our free block will leave an alignment gap which is smaller, we can + ** trim a leading free block and release it back to the pool. We must + ** do this because the previous physical block is in use, therefore + ** the prev_phys_block field is not valid, and we can't simply adjust + ** the size of that block. + */ + const size_t gap_minimum = sizeof(block_header_t); + const size_t size_with_gap = adjust_request_size(adjust + align + gap_minimum, align); + + /* + ** If alignment is less than or equals base alignment, we're done. + ** If we requested 0 bytes, return null, as tlsf_malloc(0) does. + */ + const size_t aligned_size = (adjust && align > ALIGN_SIZE) ? size_with_gap : adjust; + + block_header_t* block = block_locate_free(control, aligned_size); + + /* This can't be a static assert. */ + tlsf_assert(sizeof(block_header_t) == block_size_min + block_header_overhead); + + if (block) + { + void* ptr = block_to_ptr(block); + void* aligned = align_ptr(ptr, align); + size_t gap = tlsf_cast(size_t, + tlsf_cast(tlsfptr_t, aligned) - tlsf_cast(tlsfptr_t, ptr)); + + /* If gap size is too small, offset to next aligned boundary. */ + if (gap && gap < gap_minimum) + { + const size_t gap_remain = gap_minimum - gap; + const size_t offset = tlsf_max(gap_remain, align); + const void* next_aligned = tlsf_cast(void*, + tlsf_cast(tlsfptr_t, aligned) + offset); + + aligned = align_ptr(next_aligned, align); + gap = tlsf_cast(size_t, + tlsf_cast(tlsfptr_t, aligned) - tlsf_cast(tlsfptr_t, ptr)); + } + + if (gap) + { + tlsf_assert(gap >= gap_minimum && "gap size too small"); + block = block_trim_free_leading(control, block, gap); + } + } + + return block_prepare_used(control, block, adjust); +} + +void tlsf_free(tlsf_t tlsf, void* ptr) +{ + /* Don't attempt to free a NULL pointer. */ + if (ptr) + { + control_t* control = tlsf_cast(control_t*, tlsf); + block_header_t* block = block_from_ptr(ptr); + tlsf_assert(!block_is_free(block) && "block already marked as free"); + block_mark_as_free(block); + block = block_merge_prev(control, block); + block = block_merge_next(control, block); + block_insert(control, block); + } +} + +/* +** The TLSF block information provides us with enough information to +** provide a reasonably intelligent implementation of realloc, growing or +** shrinking the currently allocated block as required. +** +** This routine handles the somewhat esoteric edge cases of realloc: +** - a non-zero size with a null pointer will behave like malloc +** - a zero size with a non-null pointer will behave like free +** - a request that cannot be satisfied will leave the original buffer +** untouched +** - an extended buffer size will leave the newly-allocated area with +** contents undefined +*/ +void* tlsf_realloc(tlsf_t tlsf, void* ptr, size_t size) +{ + control_t* control = tlsf_cast(control_t*, tlsf); + void* p = 0; + + /* Zero-size requests are treated as free. */ + if (ptr && size == 0) + { + tlsf_free(tlsf, ptr); + } + /* Requests with NULL pointers are treated as malloc. */ + else if (!ptr) + { + p = tlsf_malloc(tlsf, size); + } + else + { + block_header_t* block = block_from_ptr(ptr); + block_header_t* next = block_next(block); + + const size_t cursize = block_size(block); + const size_t combined = cursize + block_size(next) + block_header_overhead; + const size_t adjust = adjust_request_size(size, ALIGN_SIZE); + + tlsf_assert(!block_is_free(block) && "block already marked as free"); + + /* + ** If the next block is used, or when combined with the current + ** block, does not offer enough space, we must reallocate and copy. + */ + if (adjust > cursize && (!block_is_free(next) || adjust > combined)) + { + p = tlsf_malloc(tlsf, size); + if (p) + { + const size_t minsize = tlsf_min(cursize, size); + memcpy(p, ptr, minsize); + tlsf_free(tlsf, ptr); + } + } + else + { + /* Do we need to expand to the next block? */ + if (adjust > cursize) + { + block_merge_next(control, block); + block_mark_as_used(block); + } + + /* Trim the resulting block and return the original pointer. */ + block_trim_used(control, block, adjust); + p = ptr; + } + } + + return p; +} diff --git a/deps/lightrec/tlsf/tlsf.h b/deps/lightrec/tlsf/tlsf.h new file mode 100644 index 000000000..e9b5a91c0 --- /dev/null +++ b/deps/lightrec/tlsf/tlsf.h @@ -0,0 +1,90 @@ +#ifndef INCLUDED_tlsf +#define INCLUDED_tlsf + +/* +** Two Level Segregated Fit memory allocator, version 3.1. +** Written by Matthew Conte +** http://tlsf.baisoku.org +** +** Based on the original documentation by Miguel Masmano: +** http://www.gii.upv.es/tlsf/main/docs +** +** This implementation was written to the specification +** of the document, therefore no GPL restrictions apply. +** +** Copyright (c) 2006-2016, Matthew Conte +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions are met: +** * Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** * Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** * Neither the name of the copyright holder nor the +** names of its contributors may be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +** ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +** DISCLAIMED. IN NO EVENT SHALL MATTHEW CONTE BE LIABLE FOR ANY +** DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +** ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include + +#if defined(__cplusplus) +extern "C" { +#endif + +/* tlsf_t: a TLSF structure. Can contain 1 to N pools. */ +/* pool_t: a block of memory that TLSF can manage. */ +typedef void* tlsf_t; +typedef void* pool_t; + +/* Create/destroy a memory pool. */ +tlsf_t tlsf_create(void* mem); +tlsf_t tlsf_create_with_pool(void* mem, size_t bytes); +void tlsf_destroy(tlsf_t tlsf); +pool_t tlsf_get_pool(tlsf_t tlsf); + +/* Add/remove memory pools. */ +pool_t tlsf_add_pool(tlsf_t tlsf, void* mem, size_t bytes); +void tlsf_remove_pool(tlsf_t tlsf, pool_t pool); + +/* malloc/memalign/realloc/free replacements. */ +void* tlsf_malloc(tlsf_t tlsf, size_t bytes); +void* tlsf_memalign(tlsf_t tlsf, size_t align, size_t bytes); +void* tlsf_realloc(tlsf_t tlsf, void* ptr, size_t size); +void tlsf_free(tlsf_t tlsf, void* ptr); + +/* Returns internal block size, not original request size */ +size_t tlsf_block_size(void* ptr); + +/* Overheads/limits of internal structures. */ +size_t tlsf_size(void); +size_t tlsf_align_size(void); +size_t tlsf_block_size_min(void); +size_t tlsf_block_size_max(void); +size_t tlsf_pool_overhead(void); +size_t tlsf_alloc_overhead(void); + +/* Debugging. */ +typedef void (*tlsf_walker)(void* ptr, size_t size, int used, void* user); +void tlsf_walk_pool(pool_t pool, tlsf_walker walker, void* user); +/* Returns nonzero if any internal consistency check fails. */ +int tlsf_check(tlsf_t tlsf); +int tlsf_check_pool(pool_t pool); + +#if defined(__cplusplus) +}; +#endif + +#endif