From b2381ad0aca930e4cc67c69ba4d5f181dbb8e6c1 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Tue, 15 Aug 2023 23:12:21 +1000 Subject: [PATCH] System: Refactor main loop Reduces JIT exits. Improves runahead performance. --- CMakeLists.txt | 2 + dep/vixl/CMakeLists.txt | 5 +- src/common/CMakeLists.txt | 10 + src/common/common.vcxproj | 20 + src/common/common.vcxproj.filters | 8 + src/common/fastjmp.cpp | 166 ++++++ src/common/fastjmp.h | 33 ++ src/common/fastjmp_arm.asm | 47 ++ src/common/fastjmp_x86.asm | 119 ++++ src/common/platform.h | 2 + src/common/types.h | 7 + src/core/bus.cpp | 31 +- src/core/core.vcxproj | 2 + src/core/cpu_code_cache.cpp | 153 +++-- src/core/cpu_code_cache.h | 7 +- src/core/cpu_core.cpp | 242 ++++---- src/core/cpu_core.h | 25 +- src/core/cpu_core_private.h | 28 +- src/core/cpu_recompiler_code_generator.cpp | 40 +- .../cpu_recompiler_code_generator_aarch32.cpp | 84 +-- .../cpu_recompiler_code_generator_aarch64.cpp | 75 +-- .../cpu_recompiler_code_generator_x64.cpp | 67 +-- src/core/cpu_recompiler_types.h | 7 + src/core/cpu_types.h | 11 +- src/core/gdb_protocol.cpp | 2 +- src/core/gpu.cpp | 3 +- src/core/gte.cpp | 146 ++--- src/core/pgxp.cpp | 95 ++- src/core/pgxp.h | 26 +- src/core/save_state_version.h | 2 +- src/core/system.cpp | 554 ++++++++++-------- src/core/system.h | 14 +- src/core/timing_event.cpp | 110 ++-- src/core/timing_event.h | 2 + .../win32_nogui_platform.cpp | 8 +- src/duckstation-nogui/win32_nogui_platform.h | 2 +- src/duckstation-nogui/x11_nogui_platform.cpp | 43 +- src/duckstation-qt/debuggerwindow.cpp | 4 +- src/duckstation-regtest/regtest_host.cpp | 18 +- src/frontend-common/fullscreen_ui.cpp | 2 +- src/util/jit_code_buffer.cpp | 4 +- src/util/jit_code_buffer.h | 9 + src/util/page_fault_handler.cpp | 11 + 43 files changed, 1333 insertions(+), 913 deletions(-) create mode 100644 src/common/fastjmp.cpp create mode 100644 src/common/fastjmp.h create mode 100644 src/common/fastjmp_arm.asm create mode 100644 src/common/fastjmp_x86.asm diff --git a/CMakeLists.txt b/CMakeLists.txt index a7b709f734..d8e9893d88 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -239,6 +239,8 @@ elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "arm" OR "${CMAKE_SYSTEM_PROCESSOR}" set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -marm -march=armv7-a") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -marm -march=armv7-a") endif() +elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "riscv64") + set(CPU_ARCH "riscv64") else() message(FATAL_ERROR "Unknown system processor: ${CMAKE_SYSTEM_PROCESSOR}") endif() diff --git a/dep/vixl/CMakeLists.txt b/dep/vixl/CMakeLists.txt index 88aa65e9fb..d7e4655aaa 100644 --- a/dep/vixl/CMakeLists.txt +++ b/dep/vixl/CMakeLists.txt @@ -88,4 +88,7 @@ if(${CPU_ARCH} STREQUAL "aarch64") ) endif() - +if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") + message("Enabling vixl debug assertions") + target_compile_definitions(vixl PUBLIC VIXL_DEBUG) +endif() diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 370a898491..ba996adeea 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -12,6 +12,8 @@ add_library(common dimensional_array.h error.cpp error.h + fastjmp.cpp + fastjmp.h fifo_queue.h file_system.cpp file_system.h @@ -97,6 +99,14 @@ if(WIN32) windows_headers.h ) target_link_libraries(common PRIVATE d3dcompiler.lib) + + if(${CPU_ARCH} STREQUAL "x64") + enable_language(ASM_MASM) + target_sources(common PRIVATE fastjmp_x86.asm) + elseif(${CPU_ARCH} STREQUAL "aarch32" OR ${CPU_ARCH} STREQUAL "aarch64") + enable_language(ASM_MARMASM) + target_sources(common PRIVATE fastjmp_arm.asm) + endif() endif() if(NOT WIN32 AND NOT ANDROID) diff --git a/src/common/common.vcxproj b/src/common/common.vcxproj index 31f576e302..251d8e3c5a 100644 --- a/src/common/common.vcxproj +++ b/src/common/common.vcxproj @@ -23,6 +23,7 @@ + @@ -123,6 +124,7 @@ + true @@ -192,6 +194,16 @@ + + Document + true + + + Document + true + _M_X86_32;%(PreprocessorDefinitions) + _M_X86_64;%(PreprocessorDefinitions) + true @@ -219,9 +231,17 @@ {73ee0c55-6ffe-44e7-9c12-baa52434a797} + + + + {EE054E08-3799-4A59-A422-18259C105FFD} + + + + diff --git a/src/common/common.vcxproj.filters b/src/common/common.vcxproj.filters index a89b94d0ae..35ea858b76 100644 --- a/src/common/common.vcxproj.filters +++ b/src/common/common.vcxproj.filters @@ -129,6 +129,7 @@ + @@ -234,6 +235,7 @@ + @@ -260,4 +262,10 @@ vulkan + + + + + + \ No newline at end of file diff --git a/src/common/fastjmp.cpp b/src/common/fastjmp.cpp new file mode 100644 index 0000000000..e7da0a6f04 --- /dev/null +++ b/src/common/fastjmp.cpp @@ -0,0 +1,166 @@ +// SPDX-FileCopyrightText: 2021 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#ifndef _WIN32 + +#include "fastjmp.h" + +#if defined(__APPLE__) +#define PREFIX "_" +#else +#define PREFIX "" +#endif + +#if defined(__x86_64__) + +asm("\t.global " PREFIX "fastjmp_set\n" + "\t.global " PREFIX "fastjmp_jmp\n" + "\t.text\n" + "\t" PREFIX "fastjmp_set:" + R"( + movq 0(%rsp), %rax + movq %rsp, %rdx # fixup stack pointer, so it doesn't include the call to fastjmp_set + addq $8, %rdx + movq %rax, 0(%rdi) # actually rip + movq %rbx, 8(%rdi) + movq %rdx, 16(%rdi) # actually rsp + movq %rbp, 24(%rdi) + movq %r12, 32(%rdi) + movq %r13, 40(%rdi) + movq %r14, 48(%rdi) + movq %r15, 56(%rdi) + xorl %eax, %eax + ret +)" + "\t" PREFIX "fastjmp_jmp:" + R"( + movl %esi, %eax + movq 0(%rdi), %rdx # actually rip + movq 8(%rdi), %rbx + movq 16(%rdi), %rsp # actually rsp + movq 24(%rdi), %rbp + movq 32(%rdi), %r12 + movq 40(%rdi), %r13 + movq 48(%rdi), %r14 + movq 56(%rdi), %r15 + jmp *%rdx +)"); + +#elif defined(__aarch64__) + +asm( + "\t.global " PREFIX "fastjmp_set\n" + "\t.global " PREFIX "fastjmp_jmp\n" + "\t.text\n" + "\t.align 16\n" + "\t" PREFIX "fastjmp_set:" R"( + mov x16, sp + stp x16, x30, [x0] + stp x19, x20, [x0, #16] + stp x21, x22, [x0, #32] + stp x23, x24, [x0, #48] + stp x25, x26, [x0, #64] + stp x27, x28, [x0, #80] + str x29, [x0, #96] + stp d8, d9, [x0, #112] + stp d10, d11, [x0, #128] + stp d12, d13, [x0, #144] + stp d14, d15, [x0, #160] + mov w0, wzr + br x30 +)" +".align 16\n" +"\t" PREFIX "fastjmp_jmp:" R"( + ldp x16, x30, [x0] + mov sp, x16 + ldp x19, x20, [x0, #16] + ldp x21, x22, [x0, #32] + ldp x23, x24, [x0, #48] + ldp x25, x26, [x0, #64] + ldp x27, x28, [x0, #80] + ldr x29, [x0, #96] + ldp d8, d9, [x0, #112] + ldp d10, d11, [x0, #128] + ldp d12, d13, [x0, #144] + ldp d14, d15, [x0, #160] + mov w0, w1 + br x30 +)"); + +#elif defined(__riscv) && __riscv_xlen == 64 + +asm( + "\t.global " PREFIX "fastjmp_set\n" + "\t.global " PREFIX "fastjmp_jmp\n" + "\t.text\n" + "\t.align 16\n" + "\t" PREFIX "fastjmp_set:" R"( + sd sp, 0(a0) + sd s0, 8(a0) + sd s1, 16(a0) + sd s2, 24(a0) + sd s3, 32(a0) + sd s4, 40(a0) + sd s5, 48(a0) + sd s6, 56(a0) + sd s7, 64(a0) + sd s8, 72(a0) + sd s9, 80(a0) + sd s10, 88(a0) + sd s11, 96(a0) + fsd fs0, 104(a0) + fsd fs1, 112(a0) + fsd fs2, 120(a0) + fsd fs3, 128(a0) + fsd fs4, 136(a0) + fsd fs5, 144(a0) + fsd fs6, 152(a0) + fsd fs7, 160(a0) + fsd fs8, 168(a0) + fsd fs9, 176(a0) + fsd fs10, 184(a0) + fsd fs11, 192(a0) + sd ra, 208(a0) + li a0, 0 + jr ra +)" +".align 16\n" +"\t" PREFIX "fastjmp_jmp:" R"( + ld ra, 208(a0) + fld fs11, 192(a0) + fld fs10, 184(a0) + fld fs9, 176(a0) + fld fs8, 168(a0) + fld fs7, 160(a0) + fld fs6, 152(a0) + fld fs5, 144(a0) + fld fs4, 136(a0) + fld fs3, 128(a0) + fld fs2, 120(a0) + fld fs1, 112(a0) + fld fs0, 104(a0) + ld s11, 96(a0) + ld s10, 88(a0) + ld s9, 80(a0) + ld s8, 72(a0) + ld s7, 64(a0) + ld s6, 56(a0) + ld s5, 48(a0) + ld s4, 40(a0) + ld s3, 32(a0) + ld s2, 24(a0) + ld s1, 16(a0) + ld s0, 8(a0) + ld sp, 0(a0) + mv a0, a1 + jr ra +)"); + + +#else + +#error Unknown platform. + +#endif + +#endif // __WIN32 diff --git a/src/common/fastjmp.h b/src/common/fastjmp.h new file mode 100644 index 0000000000..b0bab768c9 --- /dev/null +++ b/src/common/fastjmp.h @@ -0,0 +1,33 @@ +// SPDX-FileCopyrightText: 2021 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#include "types.h" + +#include +#include + +struct fastjmp_buf +{ +#if defined(_WIN32) && defined(_M_AMD64) + static constexpr std::size_t BUF_SIZE = 240; +#elif defined(_M_ARM64) || defined(__aarch64__) + static constexpr std::size_t BUF_SIZE = 168; +#elif defined(__x86_64__) + static constexpr std::size_t BUF_SIZE = 64; +#elif defined(_M_IX86) || defined(__i386__) + static constexpr std::size_t BUF_SIZE = 24; +#elif defined(__riscv) && __riscv_xlen == 64 + static constexpr std::size_t BUF_SIZE = 208; +#else +#error Unknown architecture. +#endif + + alignas(16) std::uint8_t buf[BUF_SIZE]; +}; + +extern "C" { +int fastjmp_set(fastjmp_buf* buf); +[[noreturn]] void fastjmp_jmp(const fastjmp_buf* buf, int ret); +} diff --git a/src/common/fastjmp_arm.asm b/src/common/fastjmp_arm.asm new file mode 100644 index 0000000000..ee664611f9 --- /dev/null +++ b/src/common/fastjmp_arm.asm @@ -0,0 +1,47 @@ +; SPDX-FileCopyrightText: 2021 Connor McLaughlin +; SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "ksarm64.h" + + EXPORT fastjmp_set + EXPORT fastjmp_jmp + + TEXTAREA + + ; void fastjmp_set(fastjmp_buf*) + LEAF_ENTRY fastjmp_set + mov x16, sp + stp x16, x30, [x0] + stp x19, x20, [x0, #16] + stp x21, x22, [x0, #32] + stp x23, x24, [x0, #48] + stp x25, x26, [x0, #64] + stp x27, x28, [x0, #80] + str x29, [x0, #96] + stp d8, d9, [x0, #112] + stp d10, d11, [x0, #128] + stp d12, d13, [x0, #144] + stp d14, d15, [x0, #160] + mov w0, wzr + br x30 + LEAF_END + + ; void fastjmp_jmp(fastjmp_buf*, int) + LEAF_ENTRY fastjmp_jmp + ldp x16, x30, [x0] + mov sp, x16 + ldp x19, x20, [x0, #16] + ldp x21, x22, [x0, #32] + ldp x23, x24, [x0, #48] + ldp x25, x26, [x0, #64] + ldp x27, x28, [x0, #80] + ldr x29, [x0, #96] + ldp d8, d9, [x0, #112] + ldp d10, d11, [x0, #128] + ldp d12, d13, [x0, #144] + ldp d14, d15, [x0, #160] + mov w0, w1 + br x30 + LEAF_END + + END diff --git a/src/common/fastjmp_x86.asm b/src/common/fastjmp_x86.asm new file mode 100644 index 0000000000..cf681d486f --- /dev/null +++ b/src/common/fastjmp_x86.asm @@ -0,0 +1,119 @@ +; SPDX-FileCopyrightText: 2021 Connor McLaughlin +; SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +IFDEF _M_X86_32 + +; ----------------------------------------- +; 32-bit X86 +; ----------------------------------------- + .386 + .model flat + +_TEXT SEGMENT + +PUBLIC @fastjmp_set@4 +PUBLIC @fastjmp_jmp@8 + +; void fastjmp_set(fastjmp_buf*) +@fastjmp_set@4 PROC + mov eax, dword ptr [esp] + mov edx, esp ; fixup stack pointer, so it doesn't include the call to fastjmp_set + add edx, 4 + mov dword ptr [ecx], eax ; actually eip + mov dword ptr [ecx + 4], ebx + mov dword ptr [ecx + 8], edx ; actually esp + mov dword ptr [ecx + 12], ebp + mov dword ptr [ecx + 16], esi + mov dword ptr [ecx + 20], edi + xor eax, eax + ret +@fastjmp_set@4 ENDP + +; void __fastcall fastjmp_jmp(fastjmp_buf*, int) +@fastjmp_jmp@8 PROC + mov eax, edx ; return code + mov edx, dword ptr [ecx + 0] + mov ebx, dword ptr [ecx + 4] + mov esp, dword ptr [ecx + 8] + mov ebp, dword ptr [ecx + 12] + mov esi, dword ptr [ecx + 16] + mov edi, dword ptr [ecx + 20] + jmp edx +@fastjmp_jmp@8 ENDP + +_TEXT ENDS + +ENDIF ; _M_X86_32 + +IFDEF _M_X86_64 + +; ----------------------------------------- +; 64-bit X86 +; ----------------------------------------- +_TEXT SEGMENT + +PUBLIC fastjmp_set +PUBLIC fastjmp_jmp + +; void fastjmp_set(fastjmp_buf*) +fastjmp_set PROC + mov rax, qword ptr [rsp] + mov rdx, rsp ; fixup stack pointer, so it doesn't include the call to fastjmp_set + add rdx, 8 + mov qword ptr [rcx], rax ; actually rip + mov qword ptr [rcx + 8], rbx + mov qword ptr [rcx + 16], rdx ; actually rsp + mov qword ptr [rcx + 24], rbp + mov qword ptr [rcx + 32], rsi + mov qword ptr [rcx + 40], rdi + mov qword ptr [rcx + 48], r12 + mov qword ptr [rcx + 56], r13 + mov qword ptr [rcx + 64], r14 + mov qword ptr [rcx + 72], r15 + movaps xmmword ptr [rcx + 80], xmm6 + movaps xmmword ptr [rcx + 96], xmm7 + movaps xmmword ptr [rcx + 112], xmm8 + add rcx, 112 ; split to two batches to fit displacement in a single byte + movaps xmmword ptr [rcx + 16], xmm9 + movaps xmmword ptr [rcx + 32], xmm10 + movaps xmmword ptr [rcx + 48], xmm11 + movaps xmmword ptr [rcx + 64], xmm12 + movaps xmmword ptr [rcx + 80], xmm13 + movaps xmmword ptr [rcx + 96], xmm14 + movaps xmmword ptr [rcx + 112], xmm15 + xor eax, eax + ret +fastjmp_set ENDP + +; void fastjmp_jmp(fastjmp_buf*, int) +fastjmp_jmp PROC + mov eax, edx ; return code + mov rdx, qword ptr [rcx + 0] ; actually rip + mov rbx, qword ptr [rcx + 8] + mov rsp, qword ptr [rcx + 16] + mov rbp, qword ptr [rcx + 24] + mov rsi, qword ptr [rcx + 32] + mov rdi, qword ptr [rcx + 40] + mov r12, qword ptr [rcx + 48] + mov r13, qword ptr [rcx + 56] + mov r14, qword ptr [rcx + 64] + mov r15, qword ptr [rcx + 72] + movaps xmm6, xmmword ptr [rcx + 80] + movaps xmm7, xmmword ptr [rcx + 96] + movaps xmm8, xmmword ptr [rcx + 112] + add rcx, 112 ; split to two batches to fit displacement in a single byte + movaps xmm9, xmmword ptr [rcx + 16] + movaps xmm10, xmmword ptr [rcx + 32] + movaps xmm11, xmmword ptr [rcx + 48] + movaps xmm12, xmmword ptr [rcx + 64] + movaps xmm13, xmmword ptr [rcx + 80] + movaps xmm14, xmmword ptr [rcx + 96] + movaps xmm15, xmmword ptr [rcx + 112] + jmp rdx +fastjmp_jmp ENDP + +_TEXT ENDS + +ENDIF ; _M_X86_64 + +END \ No newline at end of file diff --git a/src/common/platform.h b/src/common/platform.h index 40e2d0a348..8bb39c8415 100644 --- a/src/common/platform.h +++ b/src/common/platform.h @@ -27,6 +27,8 @@ #define CPU_AARCH64 1 #elif defined(__arm__) #define CPU_AARCH32 1 +#elif defined(__riscv) && __riscv_xlen == 64 +#define CPU_RISCV64 1 #else #error Unknown architecture. #endif diff --git a/src/common/types.h b/src/common/types.h index 7b182afa1e..5f78668a6a 100644 --- a/src/common/types.h +++ b/src/common/types.h @@ -65,6 +65,13 @@ char (&__countof_ArraySizeHelper(T (&array)[N]))[N]; #define UNLIKELY(x) __builtin_expect(!!(x), 0) #endif +// [[noreturn]] which can be used on function pointers. +#ifdef _MSC_VER +// __declspec(noreturn) produces error C3829. +#define NORETURN_FUNCTION_POINTER +#else +#define NORETURN_FUNCTION_POINTER __attribute__((noreturn)) +#endif // disable warnings that show up at warning level 4 // TODO: Move to build system instead diff --git a/src/core/bus.cpp b/src/core/bus.cpp index 5460a5379d..a02bedc943 100644 --- a/src/core/bus.cpp +++ b/src/core/bus.cpp @@ -21,6 +21,7 @@ #include "sio.h" #include "spu.h" #include "timers.h" +#include "timing_event.h" #include "util/state_wrapper.h" #include #include @@ -1418,7 +1419,7 @@ TickCount GetICacheFillTicks(VirtualMemoryAddress address) void CheckAndUpdateICacheTags(u32 line_count, TickCount uncached_ticks) { - VirtualMemoryAddress current_pc = g_state.regs.pc & ICACHE_TAG_ADDRESS_MASK; + VirtualMemoryAddress current_pc = g_state.pc & ICACHE_TAG_ADDRESS_MASK; if (IsCachedAddress(current_pc)) { TickCount ticks = 0; @@ -1541,10 +1542,20 @@ ALWAYS_INLINE static TickCount DoScratchpadAccess(PhysicalMemoryAddress address, } template -static ALWAYS_INLINE TickCount DoMemoryAccess(VirtualMemoryAddress address, u32& value) +static ALWAYS_INLINE_RELEASE TickCount DoMemoryAccess(VirtualMemoryAddress address, u32& value) { using namespace Bus; +#if 0 + if (type == MemoryAccessType::Write && address == 0x80113028) + { + if ((TimingEvents::GetGlobalTickCounter() + CPU::g_state.pending_ticks) == 5051485) + __debugbreak(); + + Log_WarningPrintf("VAL %08X @ %u", value, (TimingEvents::GetGlobalTickCounter() + CPU::g_state.pending_ticks)); + } +#endif + switch (address >> 29) { case 0x00: // KUSEG 0M-512M @@ -1723,9 +1734,9 @@ static bool DoAlignmentCheck(VirtualMemoryAddress address) bool FetchInstruction() { - DebugAssert(Common::IsAlignedPow2(g_state.regs.npc, 4)); + DebugAssert(Common::IsAlignedPow2(g_state.npc, 4)); - const PhysicalMemoryAddress address = g_state.regs.npc; + const PhysicalMemoryAddress address = g_state.npc; switch (address >> 29) { case 0x00: // KUSEG 0M-512M @@ -1764,16 +1775,16 @@ bool FetchInstruction() } } - g_state.regs.pc = g_state.regs.npc; - g_state.regs.npc += sizeof(g_state.next_instruction.bits); + g_state.pc = g_state.npc; + g_state.npc += sizeof(g_state.next_instruction.bits); return true; } bool FetchInstructionForInterpreterFallback() { - DebugAssert(Common::IsAlignedPow2(g_state.regs.npc, 4)); + DebugAssert(Common::IsAlignedPow2(g_state.npc, 4)); - const PhysicalMemoryAddress address = g_state.regs.npc; + const PhysicalMemoryAddress address = g_state.npc; switch (address >> 29) { case 0x00: // KUSEG 0M-512M @@ -1801,8 +1812,8 @@ bool FetchInstructionForInterpreterFallback() } } - g_state.regs.pc = g_state.regs.npc; - g_state.regs.npc += sizeof(g_state.next_instruction.bits); + g_state.pc = g_state.npc; + g_state.npc += sizeof(g_state.next_instruction.bits); return true; } diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj index 5bd94db1f5..c0465735ff 100644 --- a/src/core/core.vcxproj +++ b/src/core/core.vcxproj @@ -196,6 +196,8 @@ + ZYDIS_DISABLE_ENCODER;ZYDIS_DISABLE_AVX512;ZYDIS_DISABLE_KNC;ZYDIS_STATIC_BUILD;ZYCORE_STATIC_BUILD;%(PreprocessorDefinitions) + $(SolutionDir)dep\zydis\include;$(SolutionDir)dep\zydis\dependencies\zycore\include;%(AdditionalIncludeDirectories) $(IntDir)/%(RelativeDir)/ diff --git a/src/core/cpu_code_cache.cpp b/src/core/cpu_code_cache.cpp index a9895eb532..7cf2446947 100644 --- a/src/core/cpu_code_cache.cpp +++ b/src/core/cpu_code_cache.cpp @@ -8,6 +8,7 @@ #include "cpu_core.h" #include "cpu_core_private.h" #include "cpu_disasm.h" +#include "cpu_recompiler_types.h" #include "settings.h" #include "system.h" #include "timing_event.h" @@ -17,6 +18,8 @@ Log_SetChannel(CPU::CodeCache); #include "cpu_recompiler_code_generator.h" #endif +#include + namespace CPU::CodeCache { static constexpr bool USE_BLOCK_LINKING = true; @@ -50,6 +53,10 @@ alignas(Recompiler::CODE_STORAGE_ALIGNMENT) static u8 #endif static JitCodeBuffer s_code_buffer; + +#endif + +#ifdef WITH_RECOMPILER static FastMapTable s_fast_map[FAST_MAP_TABLE_COUNT]; static std::unique_ptr s_fast_map_pointers; @@ -253,12 +260,19 @@ void Initialize() { Panic("Failed to initialize code space"); } + } +#endif AllocateFastMap(); + +#ifdef WITH_RECOMPILER + if (g_settings.IsUsingRecompiler()) + { if (g_settings.IsUsingFastmem() && !InitializeFastmem()) Panic("Failed to initialize fastmem"); + AllocateFastMap(); CompileDispatcher(); ResetFastMap(); } @@ -293,22 +307,13 @@ void Shutdown() } template -static void ExecuteImpl() +[[noreturn]] static void ExecuteImpl() { CodeBlockKey next_block_key; - g_using_interpreter = false; - g_state.frame_done = false; - - while (!g_state.frame_done) + for (;;) { - if (HasPendingInterrupt()) - { - SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits); - DispatchInterrupt(); - } - - TimingEvents::UpdateCPUDowncount(); + TimingEvents::RunEvents(); next_block_key = GetNextBlockKey(); while (g_state.pending_ticks < g_state.downcount) @@ -384,27 +389,10 @@ static void ExecuteImpl() } } } - - TimingEvents::RunEvents(); } // in case we switch to interpreter... - g_state.regs.npc = g_state.regs.pc; -} - -void Execute() -{ - if (g_settings.gpu_pgxp_enable) - { - if (g_settings.gpu_pgxp_cpu) - ExecuteImpl(); - else - ExecuteImpl(); - } - else - { - ExecuteImpl(); - } + g_state.npc = g_state.pc; } #ifdef WITH_RECOMPILER @@ -430,21 +418,15 @@ FastMapTable* GetFastMapPointer() return s_fast_map; } -void ExecuteRecompiler() +[[noreturn]] static void ExecuteRecompiler() { - g_using_interpreter = false; - g_state.frame_done = false; - #if 0 - while (!g_state.frame_done) + for (;;) { if (HasPendingInterrupt()) - { - SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits); DispatchInterrupt(); - } - TimingEvents::UpdateCPUDowncount(); + TimingEvents::RunEvents(); while (g_state.pending_ticks < g_state.downcount) { @@ -452,18 +434,50 @@ void ExecuteRecompiler() LogCurrentState(); #endif - const u32 pc = g_state.regs.pc; + const u32 pc = g_state.pc; s_single_block_asm_dispatcher(s_fast_map[pc >> 16][pc >> 2]); } - - TimingEvents::RunEvents(); } #else s_asm_dispatcher(); #endif +} - // in case we switch to interpreter... - g_state.regs.npc = g_state.regs.pc; +#endif + +[[noreturn]] void Execute() +{ + switch (g_settings.cpu_execution_mode) + { +#ifdef WITH_RECOMPILER + case CPUExecutionMode::Recompiler: + ExecuteRecompiler(); + break; +#endif + + default: + { + if (g_settings.gpu_pgxp_enable) + { + if (g_settings.gpu_pgxp_cpu) + ExecuteImpl(); + else + ExecuteImpl(); + } + else + { + ExecuteImpl(); + } + } + break; + } +} + +#if defined(WITH_RECOMPILER) + +JitCodeBuffer& GetCodeBuffer() +{ + return s_code_buffer; } #endif @@ -473,13 +487,14 @@ void Reinitialize() ClearState(); #ifdef WITH_RECOMPILER - ShutdownFastmem(); +#endif + +#if defined(WITH_RECOMPILER) s_code_buffer.Destroy(); if (g_settings.IsUsingRecompiler()) { - #ifdef USE_STATIC_CODE_BUFFER if (!s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage), RECOMPILER_FAR_CODE_CACHE_SIZE, RECOMPILER_GUARD_SIZE)) @@ -489,7 +504,12 @@ void Reinitialize() { Panic("Failed to initialize code space"); } + } +#endif +#ifdef WITH_RECOMPILER + if (g_settings.IsUsingRecompiler()) + { if (g_settings.IsUsingFastmem() && !InitializeFastmem()) Panic("Failed to initialize fastmem"); @@ -509,25 +529,40 @@ void Flush() #endif } +#ifndef _MSC_VER +void __debugbreak() {} +#endif + void LogCurrentState() { +#if 0 + if ((TimingEvents::GetGlobalTickCounter() + GetPendingTicks()) == 2546728915) + __debugbreak(); +#endif +#if 0 + if ((TimingEvents::GetGlobalTickCounter() + GetPendingTicks()) < 2546729174) + return; +#endif + const auto& regs = g_state.regs; - WriteToExecutionLog("tick=%u pc=%08X zero=%08X at=%08X v0=%08X v1=%08X a0=%08X a1=%08X a2=%08X a3=%08X t0=%08X " - "t1=%08X t2=%08X t3=%08X t4=%08X t5=%08X t6=%08X t7=%08X s0=%08X s1=%08X s2=%08X s3=%08X s4=%08X " - "s5=%08X s6=%08X s7=%08X t8=%08X t9=%08X k0=%08X k1=%08X gp=%08X sp=%08X fp=%08X ra=%08X ldr=%s " - "ldv=%08X\n", - TimingEvents::GetGlobalTickCounter() + GetPendingTicks(), regs.pc, regs.zero, regs.at, regs.v0, - regs.v1, regs.a0, regs.a1, regs.a2, regs.a3, regs.t0, regs.t1, regs.t2, regs.t3, regs.t4, regs.t5, - regs.t6, regs.t7, regs.s0, regs.s1, regs.s2, regs.s3, regs.s4, regs.s5, regs.s6, regs.s7, regs.t8, - regs.t9, regs.k0, regs.k1, regs.gp, regs.sp, regs.fp, regs.ra, - (g_state.next_load_delay_reg == Reg::count) ? "NONE" : GetRegName(g_state.next_load_delay_reg), - (g_state.next_load_delay_reg == Reg::count) ? 0 : g_state.next_load_delay_value); + WriteToExecutionLog( + "tick=%u dc=%u/%u pc=%08X at=%08X v0=%08X v1=%08X a0=%08X a1=%08X a2=%08X a3=%08X t0=%08X " + "t1=%08X t2=%08X t3=%08X t4=%08X t5=%08X t6=%08X t7=%08X s0=%08X s1=%08X s2=%08X s3=%08X s4=%08X " + "s5=%08X s6=%08X s7=%08X t8=%08X t9=%08X k0=%08X k1=%08X gp=%08X sp=%08X fp=%08X ra=%08X ldr=%s " + "ldv=%08X cause=%08X sr=%08X gte=%08X\n", + TimingEvents::GetGlobalTickCounter() + GetPendingTicks(), g_state.pending_ticks, g_state.downcount, g_state.pc, + regs.at, regs.v0, regs.v1, regs.a0, regs.a1, regs.a2, regs.a3, regs.t0, regs.t1, regs.t2, regs.t3, regs.t4, regs.t5, + regs.t6, regs.t7, regs.s0, regs.s1, regs.s2, regs.s3, regs.s4, regs.s5, regs.s6, regs.s7, regs.t8, regs.t9, regs.k0, + regs.k1, regs.gp, regs.sp, regs.fp, regs.ra, + (g_state.next_load_delay_reg == Reg::count) ? "NONE" : GetRegName(g_state.next_load_delay_reg), + (g_state.next_load_delay_reg == Reg::count) ? 0 : g_state.next_load_delay_value, g_state.cop0_regs.cause.bits, + g_state.cop0_regs.sr.bits, static_cast(crc32(0, (const Bytef*)&g_state.gte_regs, sizeof(g_state.gte_regs)))); } CodeBlockKey GetNextBlockKey() { CodeBlockKey key = {}; - key.SetPC(g_state.regs.pc); + key.SetPC(g_state.pc); key.user_mode = InUserMode(); return key; } @@ -836,7 +871,7 @@ void FastCompileBlockFunction() void InvalidCodeFunction() { - Log_ErrorPrintf("Trying to execute invalid code at 0x%08X", g_state.regs.pc); + Log_ErrorPrintf("Trying to execute invalid code at 0x%08X", g_state.pc); if (g_settings.gpu_pgxp_enable) { if (g_settings.gpu_pgxp_cpu) @@ -1249,7 +1284,7 @@ void CPU::Recompiler::Thunks::ResolveBranch(CodeBlock* block, void* host_pc, voi void CPU::Recompiler::Thunks::LogPC(u32 pc) { -#if 0 +#if 1 CPU::CodeCache::LogCurrentState(); #endif #if 0 diff --git a/src/core/cpu_code_cache.h b/src/core/cpu_code_cache.h index 60e5c32325..026deab1c3 100644 --- a/src/core/cpu_code_cache.h +++ b/src/core/cpu_code_cache.h @@ -121,14 +121,17 @@ using FastMapTable = CodeBlock::HostCodePointer*; void Initialize(); void Shutdown(); -void Execute(); +[[noreturn]] void Execute(); #ifdef WITH_RECOMPILER using DispatcherFunction = void (*)(); using SingleBlockDispatcherFunction = void (*)(const CodeBlock::HostCodePointer); FastMapTable* GetFastMapPointer(); -void ExecuteRecompiler(); +#endif + +#if defined(WITH_RECOMPILER) +JitCodeBuffer& GetCodeBuffer(); #endif /// Flushes the code cache, forcing all blocks to be recompiled. diff --git a/src/core/cpu_core.cpp b/src/core/cpu_core.cpp index 62296e25ed..6daa0d1800 100644 --- a/src/core/cpu_core.cpp +++ b/src/core/cpu_core.cpp @@ -4,6 +4,7 @@ #include "cpu_core.h" #include "bus.h" #include "common/align.h" +#include "common/fastjmp.h" #include "common/file_system.h" #include "common/log.h" #include "cpu_core_private.h" @@ -29,9 +30,10 @@ static void Branch(u32 target); static void FlushPipeline(); State g_state; -bool g_using_interpreter = false; bool TRACE_EXECUTION = false; +static fastjmp_buf s_jmp_buf; + static std::FILE* s_log_file = nullptr; static bool s_log_file_opened = false; static bool s_trace_to_log = false; @@ -41,6 +43,7 @@ static std::vector s_breakpoints; static u32 s_breakpoint_counter = 1; static u32 s_last_breakpoint_check_pc = INVALID_BREAKPOINT_PC; static bool s_single_step = false; +static bool s_single_step_done = false; bool IsTraceEnabled() { @@ -134,6 +137,7 @@ void Reset() GTE::Reset(); + // TODO: This consumes cycles... SetPC(RESET_VECTOR); } @@ -141,7 +145,9 @@ bool DoState(StateWrapper& sw) { sw.Do(&g_state.pending_ticks); sw.Do(&g_state.downcount); - sw.DoArray(g_state.regs.r, countof(g_state.regs.r)); + sw.DoArray(g_state.regs.r, static_cast(Reg::count)); + sw.Do(&g_state.pc); + sw.Do(&g_state.npc); sw.Do(&g_state.cop0_regs.BPC); sw.Do(&g_state.cop0_regs.BDA); sw.Do(&g_state.cop0_regs.TAR); @@ -161,11 +167,23 @@ bool DoState(StateWrapper& sw) sw.Do(&g_state.next_instruction_is_branch_delay_slot); sw.Do(&g_state.branch_was_taken); sw.Do(&g_state.exception_raised); - sw.Do(&g_state.interrupt_delay); + if (sw.GetVersion() < 59) + { + bool interrupt_delay; + sw.Do(&interrupt_delay); + } sw.Do(&g_state.load_delay_reg); sw.Do(&g_state.load_delay_value); sw.Do(&g_state.next_load_delay_reg); sw.Do(&g_state.next_load_delay_value); + + // Compatibility with old states. + if (sw.GetVersion() < 59) + { + g_state.load_delay_reg = static_cast(std::min(static_cast(g_state.load_delay_reg), static_cast(Reg::count))); + g_state.next_load_delay_reg = static_cast(std::min(static_cast(g_state.load_delay_reg), static_cast(Reg::count))); + } + sw.Do(&g_state.cache_control.bits); sw.DoBytes(g_state.dcache.data(), g_state.dcache.size()); @@ -203,7 +221,7 @@ void UpdateFastmemBase() ALWAYS_INLINE_RELEASE void SetPC(u32 new_pc) { DebugAssert(Common::IsAlignedPow2(new_pc, 4)); - g_state.regs.npc = new_pc; + g_state.npc = new_pc; FlushPipeline(); } @@ -217,7 +235,7 @@ ALWAYS_INLINE_RELEASE void Branch(u32 target) return; } - g_state.regs.npc = target; + g_state.npc = target; g_state.branch_was_taken = true; } @@ -257,14 +275,14 @@ ALWAYS_INLINE_RELEASE static void RaiseException(u32 CAUSE_bits, u32 EPC, u32 ve // TAR is set to the address which was being fetched in this instruction, or the next instruction to execute if the // exception hadn't occurred in the delay slot. g_state.cop0_regs.EPC -= UINT32_C(4); - g_state.cop0_regs.TAR = g_state.regs.pc; + g_state.cop0_regs.TAR = g_state.pc; } // current -> previous, switch to kernel mode and disable interrupts g_state.cop0_regs.sr.mode_bits <<= 2; // flush the pipeline - we don't want to execute the previously fetched instruction - g_state.regs.npc = vector; + g_state.npc = vector; g_state.exception_raised = true; FlushPipeline(); } @@ -299,7 +317,7 @@ void RaiseBreakException(u32 CAUSE_bits, u32 EPC, u32 instruction_bits) if (PCDrv::HandleSyscall(instruction_bits, g_state.regs)) { // immediately return - g_state.regs.npc = EPC + 4; + g_state.npc = EPC + 4; FlushPipeline(); return; } @@ -311,16 +329,7 @@ void RaiseBreakException(u32 CAUSE_bits, u32 EPC, u32 instruction_bits) void SetExternalInterrupt(u8 bit) { g_state.cop0_regs.cause.Ip |= static_cast(1u << bit); - - if (g_settings.cpu_execution_mode == CPUExecutionMode::Interpreter) - { - g_state.interrupt_delay = 1; - } - else - { - g_state.interrupt_delay = 0; - CheckForPendingInterrupt(); - } + CheckForPendingInterrupt(); } void ClearExternalInterrupt(u8 bit) @@ -331,9 +340,7 @@ void ClearExternalInterrupt(u8 bit) ALWAYS_INLINE_RELEASE static void UpdateLoadDelay() { // the old value is needed in case the delay slot instruction overwrites the same register - if (g_state.load_delay_reg != Reg::count) - g_state.regs.r[static_cast(g_state.load_delay_reg)] = g_state.load_delay_value; - + g_state.regs.r[static_cast(g_state.load_delay_reg)] = g_state.load_delay_value; g_state.load_delay_reg = g_state.next_load_delay_reg; g_state.load_delay_value = g_state.next_load_delay_value; g_state.next_load_delay_reg = Reg::count; @@ -343,16 +350,13 @@ ALWAYS_INLINE_RELEASE static void FlushPipeline() { // loads are flushed g_state.next_load_delay_reg = Reg::count; - if (g_state.load_delay_reg != Reg::count) - { - g_state.regs.r[static_cast(g_state.load_delay_reg)] = g_state.load_delay_value; - g_state.load_delay_reg = Reg::count; - } + g_state.regs.r[static_cast(g_state.load_delay_reg)] = g_state.load_delay_value; + g_state.load_delay_reg = Reg::count; // not in a branch delay slot g_state.branch_was_taken = false; g_state.next_instruction_is_branch_delay_slot = false; - g_state.current_instruction_pc = g_state.regs.pc; + g_state.current_instruction_pc = g_state.pc; // prefetch the next instruction FetchInstruction(); @@ -649,8 +653,8 @@ const std::array {"ra", &CPU::g_state.regs.ra}, {"hi", &CPU::g_state.regs.hi}, {"lo", &CPU::g_state.regs.lo}, - {"pc", &CPU::g_state.regs.pc}, - {"npc", &CPU::g_state.regs.npc}, + {"pc", &CPU::g_state.pc}, + {"npc", &CPU::g_state.npc}, {"COP0_SR", &CPU::g_state.cop0_regs.sr.bits}, {"COP0_CAUSE", &CPU::g_state.cop0_regs.cause.bits}, @@ -1111,7 +1115,7 @@ ALWAYS_INLINE_RELEASE static void ExecuteInstruction() { g_state.next_instruction_is_branch_delay_slot = true; const u32 target = ReadReg(inst.r.rs); - WriteReg(inst.r.rd, g_state.regs.npc); + WriteReg(inst.r.rd, g_state.npc); Branch(target); } break; @@ -1267,7 +1271,7 @@ ALWAYS_INLINE_RELEASE static void ExecuteInstruction() WriteRegDelayed(inst.i.rt, sxvalue); if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_LBx(inst.bits, sxvalue, addr); + PGXP::CPU_LBx(inst.bits, addr, sxvalue); } break; @@ -1285,7 +1289,7 @@ ALWAYS_INLINE_RELEASE static void ExecuteInstruction() WriteRegDelayed(inst.i.rt, sxvalue); if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_LHx(inst.bits, sxvalue, addr); + PGXP::CPU_LHx(inst.bits, addr, sxvalue); } break; @@ -1302,7 +1306,7 @@ ALWAYS_INLINE_RELEASE static void ExecuteInstruction() WriteRegDelayed(inst.i.rt, value); if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_LW(inst.bits, value, addr); + PGXP::CPU_LW(inst.bits, addr, value); } break; @@ -1320,7 +1324,7 @@ ALWAYS_INLINE_RELEASE static void ExecuteInstruction() WriteRegDelayed(inst.i.rt, zxvalue); if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_LBx(inst.bits, zxvalue, addr); + PGXP::CPU_LBx(inst.bits, addr, zxvalue); } break; @@ -1338,7 +1342,7 @@ ALWAYS_INLINE_RELEASE static void ExecuteInstruction() WriteRegDelayed(inst.i.rt, zxvalue); if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_LHx(inst.bits, zxvalue, addr); + PGXP::CPU_LHx(inst.bits, addr, zxvalue); } break; @@ -1372,7 +1376,7 @@ ALWAYS_INLINE_RELEASE static void ExecuteInstruction() WriteRegDelayed(inst.i.rt, new_value); if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_LW(inst.bits, new_value, addr); + PGXP::CPU_LW(inst.bits, addr, new_value); } break; @@ -1386,7 +1390,7 @@ ALWAYS_INLINE_RELEASE static void ExecuteInstruction() WriteMemoryByte(addr, value); if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_SB(inst.bits, Truncate8(value), addr); + PGXP::CPU_SB(inst.bits, addr, value); } break; @@ -1400,7 +1404,7 @@ ALWAYS_INLINE_RELEASE static void ExecuteInstruction() WriteMemoryHalfWord(addr, value); if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_SH(inst.bits, Truncate16(value), addr); + PGXP::CPU_SH(inst.bits, addr, value); } break; @@ -1414,7 +1418,7 @@ ALWAYS_INLINE_RELEASE static void ExecuteInstruction() WriteMemoryWord(addr, value); if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_SW(inst.bits, value, addr); + PGXP::CPU_SW(inst.bits, addr, value); } break; @@ -1447,22 +1451,22 @@ ALWAYS_INLINE_RELEASE static void ExecuteInstruction() WriteMemoryWord(aligned_addr, new_value); if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_SW(inst.bits, new_value, addr); + PGXP::CPU_SW(inst.bits, aligned_addr, new_value); } break; case InstructionOp::j: { g_state.next_instruction_is_branch_delay_slot = true; - Branch((g_state.regs.pc & UINT32_C(0xF0000000)) | (inst.j.target << 2)); + Branch((g_state.pc & UINT32_C(0xF0000000)) | (inst.j.target << 2)); } break; case InstructionOp::jal: { - WriteReg(Reg::ra, g_state.regs.npc); + WriteReg(Reg::ra, g_state.npc); g_state.next_instruction_is_branch_delay_slot = true; - Branch((g_state.regs.pc & UINT32_C(0xF0000000)) | (inst.j.target << 2)); + Branch((g_state.pc & UINT32_C(0xF0000000)) | (inst.j.target << 2)); } break; @@ -1472,7 +1476,7 @@ ALWAYS_INLINE_RELEASE static void ExecuteInstruction() g_state.next_instruction_is_branch_delay_slot = true; const bool branch = (ReadReg(inst.i.rs) == ReadReg(inst.i.rt)); if (branch) - Branch(g_state.regs.pc + (inst.i.imm_sext32() << 2)); + Branch(g_state.pc + (inst.i.imm_sext32() << 2)); } break; @@ -1481,7 +1485,7 @@ ALWAYS_INLINE_RELEASE static void ExecuteInstruction() g_state.next_instruction_is_branch_delay_slot = true; const bool branch = (ReadReg(inst.i.rs) != ReadReg(inst.i.rt)); if (branch) - Branch(g_state.regs.pc + (inst.i.imm_sext32() << 2)); + Branch(g_state.pc + (inst.i.imm_sext32() << 2)); } break; @@ -1490,7 +1494,7 @@ ALWAYS_INLINE_RELEASE static void ExecuteInstruction() g_state.next_instruction_is_branch_delay_slot = true; const bool branch = (static_cast(ReadReg(inst.i.rs)) > 0); if (branch) - Branch(g_state.regs.pc + (inst.i.imm_sext32() << 2)); + Branch(g_state.pc + (inst.i.imm_sext32() << 2)); } break; @@ -1499,7 +1503,7 @@ ALWAYS_INLINE_RELEASE static void ExecuteInstruction() g_state.next_instruction_is_branch_delay_slot = true; const bool branch = (static_cast(ReadReg(inst.i.rs)) <= 0); if (branch) - Branch(g_state.regs.pc + (inst.i.imm_sext32() << 2)); + Branch(g_state.pc + (inst.i.imm_sext32() << 2)); } break; @@ -1515,10 +1519,10 @@ ALWAYS_INLINE_RELEASE static void ExecuteInstruction() // register is still linked even if the branch isn't taken const bool link = (rt & u8(0x1E)) == u8(0x10); if (link) - WriteReg(Reg::ra, g_state.regs.npc); + WriteReg(Reg::ra, g_state.npc); if (branch) - Branch(g_state.regs.pc + (inst.i.imm_sext32() << 2)); + Branch(g_state.pc + (inst.i.imm_sext32() << 2)); } break; @@ -1610,7 +1614,7 @@ ALWAYS_INLINE_RELEASE static void ExecuteInstruction() WriteRegDelayed(inst.r.rt, value); if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_CFC2(inst.bits, value, value); + PGXP::CPU_MFC2(inst.bits, value); } break; @@ -1620,7 +1624,7 @@ ALWAYS_INLINE_RELEASE static void ExecuteInstruction() GTE::WriteRegister(static_cast(inst.r.rd.GetValue()) + 32, value); if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_CTC2(inst.bits, value, value); + PGXP::CPU_MTC2(inst.bits, value); } break; @@ -1630,7 +1634,7 @@ ALWAYS_INLINE_RELEASE static void ExecuteInstruction() WriteRegDelayed(inst.r.rt, value); if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_MFC2(inst.bits, value, value); + PGXP::CPU_MFC2(inst.bits, value); } break; @@ -1640,7 +1644,7 @@ ALWAYS_INLINE_RELEASE static void ExecuteInstruction() GTE::WriteRegister(static_cast(inst.r.rd.GetValue()), value); if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_MTC2(inst.bits, value, value); + PGXP::CPU_MTC2(inst.bits, value); } break; @@ -1674,7 +1678,7 @@ ALWAYS_INLINE_RELEASE static void ExecuteInstruction() GTE::WriteRegister(ZeroExtend32(static_cast(inst.i.rt.GetValue())), value); if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_LWC2(inst.bits, value, addr); + PGXP::CPU_LWC2(inst.bits, addr, value); } break; @@ -1694,7 +1698,7 @@ ALWAYS_INLINE_RELEASE static void ExecuteInstruction() WriteMemoryWord(addr, value); if constexpr (pgxp_mode >= PGXPMode::Memory) - PGXP::CPU_SWC2(inst.bits, value, addr); + PGXP::CPU_SWC2(inst.bits, addr, value); } break; @@ -1734,7 +1738,7 @@ void DispatchInterrupt() { // If the instruction we're about to execute is a GTE instruction, delay dispatching the interrupt until the next // instruction. For some reason, if we don't do this, we end up with incorrectly sorted polygons and flickering.. - SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits); + SafeReadInstruction(g_state.pc, &g_state.next_instruction.bits); if (g_state.next_instruction.op == InstructionOp::cop2 && !g_state.next_instruction.cop.IsCommonInstruction()) { StallUntilGTEComplete(); @@ -1745,7 +1749,10 @@ void DispatchInterrupt() RaiseException( Cop0Registers::CAUSE::MakeValueForException(Exception::INT, g_state.next_instruction_is_branch_delay_slot, g_state.branch_was_taken, g_state.next_instruction.cop.cop_n), - g_state.regs.pc); + g_state.pc); + + // Fix up downcount, the pending IRQ set it to zero. + TimingEvents::UpdateCPUDowncount(); } void UpdateDebugDispatcherFlag() @@ -1763,14 +1770,16 @@ void UpdateDebugDispatcherFlag() Log_DevPrintf("%s debug dispatcher", use_debug_dispatcher ? "Now using" : "No longer using"); g_state.use_debug_dispatcher = use_debug_dispatcher; - ForceDispatcherExit(); + ExitExecution(); } -void ForceDispatcherExit() +void ExitExecution() { - // zero the downcount so we break out and switch - g_state.downcount = 0; - g_state.frame_done = true; + // can't exit while running events without messing things up + if (TimingEvents::IsRunningEvents()) + TimingEvents::SetFrameDone(); + else + fastjmp_jmp(&s_jmp_buf, 1); } bool HasAnyBreakpoints() @@ -1869,7 +1878,7 @@ void ClearBreakpoints() bool AddStepOverBreakpoint() { - u32 bp_pc = g_state.regs.pc; + u32 bp_pc = g_state.pc; Instruction inst; if (!SafeReadInstruction(bp_pc, &inst.bits)) @@ -1880,7 +1889,7 @@ bool AddStepOverBreakpoint() if (!IsCallInstruction(inst)) { Host::ReportFormattedDebuggerMessage(Host::TranslateString("DebuggerMessage", "0x%08X is not a call instruction."), - g_state.regs.pc); + g_state.pc); return false; } @@ -1890,7 +1899,7 @@ bool AddStepOverBreakpoint() if (IsBranchInstruction(inst)) { Host::ReportFormattedDebuggerMessage( - Host::TranslateString("DebuggerMessage", "Can't step over double branch at 0x%08X"), g_state.regs.pc); + Host::TranslateString("DebuggerMessage", "Can't step over double branch at 0x%08X"), g_state.pc); return false; } @@ -1905,7 +1914,7 @@ bool AddStepOverBreakpoint() bool AddStepOutBreakpoint(u32 max_instructions_to_search) { // find the branch-to-ra instruction. - u32 ret_pc = g_state.regs.pc; + u32 ret_pc = g_state.pc; for (u32 i = 0; i < max_instructions_to_search; i++) { ret_pc += sizeof(Instruction); @@ -1929,21 +1938,24 @@ bool AddStepOutBreakpoint(u32 max_instructions_to_search) Host::ReportFormattedDebuggerMessage( Host::TranslateString("DebuggerMessage", "No return instruction found after %u instructions for step-out at %08X."), - max_instructions_to_search, g_state.regs.pc); + max_instructions_to_search, g_state.pc); return false; } ALWAYS_INLINE_RELEASE static bool BreakpointCheck() { - const u32 pc = g_state.regs.pc; + const u32 pc = g_state.pc; // single step - we want to break out after this instruction, so set a pending exit // the bp check happens just before execution, so this is fine if (s_single_step) { - ForceDispatcherExit(); - s_single_step = false; + if (s_single_step_done) + ExitExecution(); + else + s_single_step_done = true; + s_last_breakpoint_check_pc = pc; return false; } @@ -2004,19 +2016,14 @@ ALWAYS_INLINE_RELEASE static bool BreakpointCheck() } template -static void ExecuteImpl() +[[noreturn]] static void ExecuteImpl() { - g_using_interpreter = true; - g_state.frame_done = false; - while (!g_state.frame_done) + for (;;) { - TimingEvents::UpdateCPUDowncount(); + TimingEvents::RunEvents(); while (g_state.pending_ticks < g_state.downcount) { - if (HasPendingInterrupt() && !g_state.interrupt_delay) - DispatchInterrupt(); - if constexpr (debug) { Cop0ExecutionBreakpointCheck(); @@ -2028,12 +2035,11 @@ static void ExecuteImpl() } } - g_state.interrupt_delay = false; g_state.pending_ticks++; // now executing the instruction we previously fetched g_state.current_instruction.bits = g_state.next_instruction.bits; - g_state.current_instruction_pc = g_state.regs.pc; + g_state.current_instruction_pc = g_state.pc; g_state.current_instruction_in_branch_delay_slot = g_state.next_instruction_is_branch_delay_slot; g_state.current_instruction_was_branch_taken = g_state.branch_was_taken; g_state.next_instruction_is_branch_delay_slot = false; @@ -2065,46 +2071,74 @@ static void ExecuteImpl() // next load delay UpdateLoadDelay(); } - - TimingEvents::RunEvents(); } } -void Execute() +static void ExecuteDebug() { if (g_settings.gpu_pgxp_enable) { if (g_settings.gpu_pgxp_cpu) - ExecuteImpl(); + ExecuteImpl(); else - ExecuteImpl(); + ExecuteImpl(); } else { - ExecuteImpl(); + ExecuteImpl(); } } -void ExecuteDebug() +void Execute() { - if (g_settings.gpu_pgxp_enable) + const CPUExecutionMode exec_mode = g_settings.cpu_execution_mode; + const bool use_debug_dispatcher = g_state.use_debug_dispatcher; + if (fastjmp_set(&s_jmp_buf) != 0) { - if (g_settings.gpu_pgxp_cpu) - ExecuteImpl(); - else - ExecuteImpl(); + // Before we return, set npc to pc so that we can switch from recs to int. + if (exec_mode != CPUExecutionMode::Interpreter && !use_debug_dispatcher) + g_state.npc = g_state.pc; + + return; } - else + + if (use_debug_dispatcher) { - ExecuteImpl(); + ExecuteDebug(); + return; + } + + switch (exec_mode) + { + case CPUExecutionMode::Recompiler: + case CPUExecutionMode::CachedInterpreter: + CodeCache::Execute(); + break; + + case CPUExecutionMode::Interpreter: + default: + { + if (g_settings.gpu_pgxp_enable) + { + if (g_settings.gpu_pgxp_cpu) + ExecuteImpl(); + else + ExecuteImpl(); + } + else + { + ExecuteImpl(); + } + } + break; } } void SingleStep() { - s_single_step = true; - ExecuteDebug(); - Host::ReportFormattedDebuggerMessage("Stepped to 0x%08X.", g_state.regs.pc); + if (fastjmp_set(&s_jmp_buf) == 0) + ExecuteDebug(); + Host::ReportFormattedDebuggerMessage("Stepped to 0x%08X.", g_state.pc); } namespace CodeCache { @@ -2113,8 +2147,8 @@ template void InterpretCachedBlock(const CodeBlock& block) { // set up the state so we've already fetched the instruction - DebugAssert(g_state.regs.pc == block.GetPC()); - g_state.regs.npc = block.GetPC() + 4; + DebugAssert(g_state.pc == block.GetPC()); + g_state.npc = block.GetPC() + 4; for (const CodeBlockInstruction& cbi : block.instructions) { @@ -2129,8 +2163,8 @@ void InterpretCachedBlock(const CodeBlock& block) g_state.exception_raised = false; // update pc - g_state.regs.pc = g_state.regs.npc; - g_state.regs.npc += 4; + g_state.pc = g_state.npc; + g_state.npc += 4; // execute the instruction we previously fetched ExecuteInstruction(); @@ -2153,7 +2187,7 @@ template void InterpretCachedBlock(const CodeBlock& block); template void InterpretUncachedBlock() { - g_state.regs.npc = g_state.regs.pc; + g_state.npc = g_state.pc; if (!FetchInstructionForInterpreterFallback()) return; @@ -2166,7 +2200,7 @@ void InterpretUncachedBlock() // now executing the instruction we previously fetched g_state.current_instruction.bits = g_state.next_instruction.bits; - g_state.current_instruction_pc = g_state.regs.pc; + g_state.current_instruction_pc = g_state.pc; g_state.current_instruction_in_branch_delay_slot = g_state.next_instruction_is_branch_delay_slot; g_state.current_instruction_was_branch_taken = g_state.branch_was_taken; g_state.next_instruction_is_branch_delay_slot = false; @@ -2182,7 +2216,7 @@ void InterpretUncachedBlock() } else { - g_state.regs.pc = g_state.regs.npc; + g_state.pc = g_state.npc; } // execute the instruction we previously fetched diff --git a/src/core/cpu_core.h b/src/core/cpu_core.h index 4e749eb72b..cba2f05f64 100644 --- a/src/core/cpu_core.h +++ b/src/core/cpu_core.h @@ -56,7 +56,9 @@ struct State Registers regs = {}; Cop0Registers cop0_regs = {}; - Instruction next_instruction = {}; + + u32 pc; // at execution time: the address of the next instruction to execute (already fetched) + u32 npc; // at execution time: the address of the next instruction to fetch // address of the instruction currently being executed Instruction current_instruction = {}; @@ -66,15 +68,14 @@ struct State bool next_instruction_is_branch_delay_slot = false; bool branch_was_taken = false; bool exception_raised = false; - bool interrupt_delay = false; - bool frame_done = false; // load delays Reg load_delay_reg = Reg::count; - u32 load_delay_value = 0; Reg next_load_delay_reg = Reg::count; + u32 load_delay_value = 0; u32 next_load_delay_value = 0; + Instruction next_instruction = {}; CacheControl cache_control{0}; // GTE registers are stored here so we can access them on ARM with a single instruction @@ -95,7 +96,6 @@ struct State }; extern State g_state; -extern bool g_using_interpreter; void Initialize(); void Shutdown(); @@ -106,38 +106,37 @@ void UpdateFastmemBase(); /// Executes interpreter loop. void Execute(); -void ExecuteDebug(); void SingleStep(); // Forces an early exit from the CPU dispatcher. -void ForceDispatcherExit(); +void ExitExecution(); -ALWAYS_INLINE Registers& GetRegs() +ALWAYS_INLINE static Registers& GetRegs() { return g_state.regs; } -ALWAYS_INLINE TickCount GetPendingTicks() +ALWAYS_INLINE static TickCount GetPendingTicks() { return g_state.pending_ticks; } -ALWAYS_INLINE void ResetPendingTicks() +ALWAYS_INLINE static void ResetPendingTicks() { g_state.gte_completion_tick = (g_state.pending_ticks < g_state.gte_completion_tick) ? (g_state.gte_completion_tick - g_state.pending_ticks) : 0; g_state.pending_ticks = 0; } -ALWAYS_INLINE void AddPendingTicks(TickCount ticks) +ALWAYS_INLINE static void AddPendingTicks(TickCount ticks) { g_state.pending_ticks += ticks; } // state helpers -ALWAYS_INLINE bool InUserMode() +ALWAYS_INLINE static bool InUserMode() { return g_state.cop0_regs.sr.KUc; } -ALWAYS_INLINE bool InKernelMode() +ALWAYS_INLINE static bool InKernelMode() { return !g_state.cop0_regs.sr.KUc; } diff --git a/src/core/cpu_core_private.h b/src/core/cpu_core_private.h index 8a5caae393..6b44713eb9 100644 --- a/src/core/cpu_core_private.h +++ b/src/core/cpu_core_private.h @@ -12,13 +12,13 @@ void RaiseException(Exception excode); void RaiseException(u32 CAUSE_bits, u32 EPC); void RaiseBreakException(u32 CAUSE_bits, u32 EPC, u32 instruction_bits); -ALWAYS_INLINE bool HasPendingInterrupt() +ALWAYS_INLINE static bool HasPendingInterrupt() { return g_state.cop0_regs.sr.IEc && (((g_state.cop0_regs.cause.bits & g_state.cop0_regs.sr.bits) & (UINT32_C(0xFF) << 8)) != 0); } -ALWAYS_INLINE void CheckForPendingInterrupt() +ALWAYS_INLINE static void CheckForPendingInterrupt() { if (HasPendingInterrupt()) g_state.downcount = 0; @@ -28,36 +28,36 @@ void DispatchInterrupt(); void UpdateDebugDispatcherFlag(); // icache stuff -ALWAYS_INLINE bool IsCachedAddress(VirtualMemoryAddress address) +ALWAYS_INLINE static bool IsCachedAddress(VirtualMemoryAddress address) { // KUSEG, KSEG0 return (address >> 29) <= 4; } -ALWAYS_INLINE u32 GetICacheLine(VirtualMemoryAddress address) +ALWAYS_INLINE static u32 GetICacheLine(VirtualMemoryAddress address) { return ((address >> 4) & 0xFFu); } -ALWAYS_INLINE u32 GetICacheLineOffset(VirtualMemoryAddress address) +ALWAYS_INLINE static u32 GetICacheLineOffset(VirtualMemoryAddress address) { return (address & (ICACHE_LINE_SIZE - 1)); } -ALWAYS_INLINE u32 GetICacheTagForAddress(VirtualMemoryAddress address) +ALWAYS_INLINE static u32 GetICacheTagForAddress(VirtualMemoryAddress address) { return (address & ICACHE_TAG_ADDRESS_MASK); } -ALWAYS_INLINE u32 GetICacheFillTagForAddress(VirtualMemoryAddress address) +ALWAYS_INLINE static u32 GetICacheFillTagForAddress(VirtualMemoryAddress address) { static const u32 invalid_bits[4] = {0, 1, 3, 7}; return GetICacheTagForAddress(address) | invalid_bits[(address >> 2) & 0x03u]; } -ALWAYS_INLINE u32 GetICacheTagMaskForAddress(VirtualMemoryAddress address) +ALWAYS_INLINE static u32 GetICacheTagMaskForAddress(VirtualMemoryAddress address) { static const u32 mask[4] = {ICACHE_TAG_ADDRESS_MASK | 1, ICACHE_TAG_ADDRESS_MASK | 2, ICACHE_TAG_ADDRESS_MASK | 4, ICACHE_TAG_ADDRESS_MASK | 8}; return mask[(address >> 2) & 0x03u]; } -ALWAYS_INLINE bool CompareICacheTag(VirtualMemoryAddress address) +ALWAYS_INLINE static bool CompareICacheTag(VirtualMemoryAddress address) { const u32 line = GetICacheLine(address); return ((g_state.icache_tags[line] & GetICacheTagMaskForAddress(address)) == GetICacheTagForAddress(address)); @@ -68,7 +68,7 @@ TickCount GetICacheFillTicks(VirtualMemoryAddress address); u32 FillICache(VirtualMemoryAddress address); void CheckAndUpdateICacheTags(u32 line_count, TickCount uncached_ticks); -ALWAYS_INLINE Segment GetSegmentForAddress(VirtualMemoryAddress address) +ALWAYS_INLINE static Segment GetSegmentForAddress(VirtualMemoryAddress address) { switch ((address >> 29)) { @@ -91,12 +91,12 @@ ALWAYS_INLINE Segment GetSegmentForAddress(VirtualMemoryAddress address) } } -ALWAYS_INLINE PhysicalMemoryAddress VirtualAddressToPhysical(VirtualMemoryAddress address) +ALWAYS_INLINE static constexpr PhysicalMemoryAddress VirtualAddressToPhysical(VirtualMemoryAddress address) { return (address & PHYSICAL_MEMORY_ADDRESS_MASK); } -ALWAYS_INLINE VirtualMemoryAddress PhysicalAddressToVirtual(PhysicalMemoryAddress address, Segment segment) +ALWAYS_INLINE static VirtualMemoryAddress PhysicalAddressToVirtual(PhysicalMemoryAddress address, Segment segment) { static constexpr std::array bases = {{0x00000000, 0x80000000, 0xA0000000, 0xE0000000}}; return bases[static_cast(segment)] | address; @@ -115,12 +115,12 @@ bool WriteMemoryWord(VirtualMemoryAddress addr, u32 value); void* GetDirectReadMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size, TickCount* read_ticks); void* GetDirectWriteMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size); -ALWAYS_INLINE void AddGTETicks(TickCount ticks) +ALWAYS_INLINE static void AddGTETicks(TickCount ticks) { g_state.gte_completion_tick = g_state.pending_ticks + ticks + 1; } -ALWAYS_INLINE void StallUntilGTEComplete() +ALWAYS_INLINE static void StallUntilGTEComplete() { g_state.pending_ticks = (g_state.gte_completion_tick > g_state.pending_ticks) ? g_state.gte_completion_tick : g_state.pending_ticks; diff --git a/src/core/cpu_recompiler_code_generator.cpp b/src/core/cpu_recompiler_code_generator.cpp index 02965cc2e3..20f275df44 100644 --- a/src/core/cpu_recompiler_code_generator.cpp +++ b/src/core/cpu_recompiler_code_generator.cpp @@ -1156,7 +1156,7 @@ Value CodeGenerator::GetCurrentInstructionPC(u32 offset /* = 0 */) void CodeGenerator::WriteNewPC(const Value& value, bool commit) { // TODO: This _could_ be moved into the register cache, but would it gain anything? - EmitStoreGuestRegister(Reg::pc, value); + EmitStoreCPUStructField(offsetof(CPU::State, pc), value); if (commit) { m_pc_valid = value.IsConstant(); @@ -1450,7 +1450,7 @@ bool CodeGenerator::Compile_Load(const CodeBlockInstruction& cbi) result = EmitLoadGuestMemory(cbi, address, address_spec, RegSize_8); ConvertValueSizeInPlace(&result, RegSize_32, (cbi.instruction.op == InstructionOp::lb)); if (g_settings.gpu_pgxp_enable) - EmitFunctionCall(nullptr, PGXP::CPU_LBx, Value::FromConstantU32(cbi.instruction.bits), result, address); + EmitFunctionCall(nullptr, PGXP::CPU_LBx, Value::FromConstantU32(cbi.instruction.bits), address, result); if (address_spec) { @@ -1468,7 +1468,7 @@ bool CodeGenerator::Compile_Load(const CodeBlockInstruction& cbi) ConvertValueSizeInPlace(&result, RegSize_32, (cbi.instruction.op == InstructionOp::lh)); if (g_settings.gpu_pgxp_enable) - EmitFunctionCall(nullptr, PGXP::CPU_LHx, Value::FromConstantU32(cbi.instruction.bits), result, address); + EmitFunctionCall(nullptr, PGXP::CPU_LHx, Value::FromConstantU32(cbi.instruction.bits), address, result); if (address_spec) { @@ -1483,7 +1483,7 @@ bool CodeGenerator::Compile_Load(const CodeBlockInstruction& cbi) { result = EmitLoadGuestMemory(cbi, address, address_spec, RegSize_32); if (g_settings.gpu_pgxp_enable) - EmitFunctionCall(nullptr, PGXP::CPU_LW, Value::FromConstantU32(cbi.instruction.bits), result, address); + EmitFunctionCall(nullptr, PGXP::CPU_LW, Value::FromConstantU32(cbi.instruction.bits), address, result); if (address_spec) value_spec = SpeculativeReadMemory(*address_spec); @@ -1522,10 +1522,7 @@ bool CodeGenerator::Compile_Store(const CodeBlockInstruction& cbi) case InstructionOp::sb: { if (g_settings.gpu_pgxp_enable) - { - EmitFunctionCall(nullptr, PGXP::CPU_SB, Value::FromConstantU32(cbi.instruction.bits), - value.ViewAsSize(RegSize_8), address); - } + EmitFunctionCall(nullptr, PGXP::CPU_SB, Value::FromConstantU32(cbi.instruction.bits), address, value); EmitStoreGuestMemory(cbi, address, address_spec, RegSize_8, value); @@ -1553,10 +1550,7 @@ bool CodeGenerator::Compile_Store(const CodeBlockInstruction& cbi) case InstructionOp::sh: { if (g_settings.gpu_pgxp_enable) - { - EmitFunctionCall(nullptr, PGXP::CPU_SH, Value::FromConstantU32(cbi.instruction.bits), - value.ViewAsSize(RegSize_16), address); - } + EmitFunctionCall(nullptr, PGXP::CPU_SH, Value::FromConstantU32(cbi.instruction.bits), address, value); EmitStoreGuestMemory(cbi, address, address_spec, RegSize_16, value); @@ -1584,7 +1578,7 @@ bool CodeGenerator::Compile_Store(const CodeBlockInstruction& cbi) case InstructionOp::sw: { if (g_settings.gpu_pgxp_enable) - EmitFunctionCall(nullptr, PGXP::CPU_SW, Value::FromConstantU32(cbi.instruction.bits), value, address); + EmitFunctionCall(nullptr, PGXP::CPU_SW, Value::FromConstantU32(cbi.instruction.bits), address, value); EmitStoreGuestMemory(cbi, address, address_spec, RegSize_32, value); @@ -1688,7 +1682,7 @@ bool CodeGenerator::Compile_LoadLeftRight(const CodeBlockInstruction& cbi) shift.ReleaseAndClear(); if (g_settings.gpu_pgxp_enable) - EmitFunctionCall(nullptr, PGXP::CPU_LW, Value::FromConstantU32(cbi.instruction.bits), mem, address); + EmitFunctionCall(nullptr, PGXP::CPU_LW, Value::FromConstantU32(cbi.instruction.bits), address, mem); m_register_cache.WriteGuestRegisterDelayed(cbi.instruction.i.rt, std::move(mem)); @@ -1751,7 +1745,7 @@ bool CodeGenerator::Compile_StoreLeftRight(const CodeBlockInstruction& cbi) EmitStoreGuestMemory(cbi, address, address_spec, RegSize_32, mem); if (g_settings.gpu_pgxp_enable) - EmitFunctionCall(nullptr, PGXP::CPU_SW, Value::FromConstantU32(cbi.instruction.bits), mem, address); + EmitFunctionCall(nullptr, PGXP::CPU_SW, Value::FromConstantU32(cbi.instruction.bits), address, mem); InstructionEpilogue(cbi); return true; @@ -2950,7 +2944,7 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi) DoGTERegisterWrite(reg, value); if (g_settings.gpu_pgxp_enable) - EmitFunctionCall(nullptr, PGXP::CPU_LWC2, Value::FromConstantU32(cbi.instruction.bits), value, address); + EmitFunctionCall(nullptr, PGXP::CPU_LWC2, Value::FromConstantU32(cbi.instruction.bits), address, value); } else { @@ -2958,7 +2952,7 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi) EmitStoreGuestMemory(cbi, address, spec_address, RegSize_32, value); if (g_settings.gpu_pgxp_enable) - EmitFunctionCall(nullptr, PGXP::CPU_SWC2, Value::FromConstantU32(cbi.instruction.bits), value, address); + EmitFunctionCall(nullptr, PGXP::CPU_SWC2, Value::FromConstantU32(cbi.instruction.bits), address, value); SpeculativeValue spec_base = SpeculativeReadReg(cbi.instruction.i.rs); if (spec_base) @@ -2988,11 +2982,7 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi) // PGXP done first here before ownership is transferred. if (g_settings.gpu_pgxp_enable) - { - EmitFunctionCall( - nullptr, (cbi.instruction.cop.CommonOp() == CopCommonInstruction::cfcn) ? PGXP::CPU_CFC2 : PGXP::CPU_MFC2, - Value::FromConstantU32(cbi.instruction.bits), value, value); - } + EmitFunctionCall(nullptr, PGXP::CPU_MFC2, Value::FromConstantU32(cbi.instruction.bits), value); m_register_cache.WriteGuestRegisterDelayed(cbi.instruction.r.rt, std::move(value)); SpeculativeWriteReg(cbi.instruction.r.rt, std::nullopt); @@ -3014,11 +3004,7 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi) DoGTERegisterWrite(reg, value); if (g_settings.gpu_pgxp_enable) - { - EmitFunctionCall( - nullptr, (cbi.instruction.cop.CommonOp() == CopCommonInstruction::ctcn) ? PGXP::CPU_CTC2 : PGXP::CPU_MTC2, - Value::FromConstantU32(cbi.instruction.bits), value, value); - } + EmitFunctionCall(nullptr, PGXP::CPU_MTC2, Value::FromConstantU32(cbi.instruction.bits), value); InstructionEpilogue(cbi); return true; diff --git a/src/core/cpu_recompiler_code_generator_aarch32.cpp b/src/core/cpu_recompiler_code_generator_aarch32.cpp index 9cfbb4f079..4b82622c05 100644 --- a/src/core/cpu_recompiler_code_generator_aarch32.cpp +++ b/src/core/cpu_recompiler_code_generator_aarch32.cpp @@ -32,9 +32,6 @@ constexpr u32 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224 constexpr u32 FUNCTION_STACK_SIZE = FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE + FUNCTION_CALL_SHADOW_SPACE; -// PC we return to after the end of the block -static void* s_dispatcher_return_address; - static s32 GetPCDisplacement(const void* current, const void* target) { Assert(Common::IsAlignedPow2(reinterpret_cast(current), 4)); @@ -201,10 +198,7 @@ void CodeGenerator::EmitEndBlock(bool free_registers /* = true */, bool emit_ret m_emit->add(a32::sp, a32::sp, FUNCTION_STACK_SIZE); if (emit_return) - { - // m_emit->b(GetPCDisplacement(GetCurrentCodePointer(), s_dispatcher_return_address)); m_emit->bx(a32::lr); - } } void CodeGenerator::EmitExceptionExit() @@ -219,7 +213,6 @@ void CodeGenerator::EmitExceptionExit() m_register_cache.PopCalleeSavedRegisters(false); m_emit->add(a32::sp, a32::sp, FUNCTION_STACK_SIZE); - // m_emit->b(GetPCDisplacement(GetCurrentCodePointer(), s_dispatcher_return_address)); m_emit->bx(a32::lr); } @@ -2072,64 +2065,16 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher() EmitLoadGlobalAddress(RCPUPTR, &g_state); - a32::Label frame_done_loop; - a32::Label exit_dispatcher; - m_emit->Bind(&frame_done_loop); - - // if frame_done goto exit_dispatcher - m_emit->ldrb(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, frame_done))); - m_emit->tst(a32::r0, 1); - m_emit->b(a32::ne, &exit_dispatcher); - - // r0 <- sr - a32::Label no_interrupt; - m_emit->ldr(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, cop0_regs.sr.bits))); - - // if Iec == 0 then goto no_interrupt - m_emit->tst(a32::r0, 1); - m_emit->b(a32::eq, &no_interrupt); - - // r1 <- cause - // r0 (sr) & cause - m_emit->ldr(a32::r1, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, cop0_regs.cause.bits))); - m_emit->and_(a32::r0, a32::r0, a32::r1); - - // ((sr & cause) & 0xff00) == 0 goto no_interrupt - m_emit->tst(a32::r0, 0xFF00); - m_emit->b(a32::eq, &no_interrupt); - - // we have an interrupt - EmitCall(reinterpret_cast(&DispatchInterrupt)); - - // no interrupt or we just serviced it - m_emit->Bind(&no_interrupt); - - // TimingEvents::UpdateCPUDowncount: - // r0 <- head event->downcount - // downcount <- r0 - EmitLoadGlobalAddress(0, TimingEvents::GetHeadEventPtr()); - m_emit->ldr(a32::r0, a32::MemOperand(a32::r0)); - m_emit->ldr(a32::r0, a32::MemOperand(a32::r0, offsetof(TimingEvent, m_downcount))); - m_emit->str(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, downcount))); + a32::Label event_test; + m_emit->b(&event_test); // main dispatch loop a32::Label main_loop; m_emit->Bind(&main_loop); - s_dispatcher_return_address = GetCurrentCodePointer(); - - // r0 <- pending_ticks - // r1 <- downcount - m_emit->ldr(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, pending_ticks))); - m_emit->ldr(a32::r1, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, downcount))); - - // while downcount < pending_ticks - a32::Label downcount_hit; - m_emit->cmp(a32::r0, a32::r1); - m_emit->b(a32::ge, &downcount_hit); // time to lookup the block // r0 <- pc - m_emit->ldr(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, regs.pc))); + m_emit->ldr(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, pc))); // r1 <- s_fast_map[pc >> 16] EmitLoadGlobalAddress(2, CodeCache::GetFastMapPointer()); @@ -2140,21 +2085,20 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher() m_emit->ldr(a32::r0, a32::MemOperand(a32::r1, a32::r0)); m_emit->blx(a32::r0); - // end while - m_emit->Bind(&downcount_hit); - - // check events then for frame done + // r0 <- pending_ticks + // r1 <- downcount m_emit->ldr(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, pending_ticks))); - EmitLoadGlobalAddress(1, TimingEvents::GetHeadEventPtr()); - m_emit->ldr(a32::r1, a32::MemOperand(a32::r1)); - m_emit->ldr(a32::r1, a32::MemOperand(a32::r1, offsetof(TimingEvent, m_downcount))); + m_emit->ldr(a32::r1, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, downcount))); + + // while downcount < pending_ticks + a32::Label downcount_hit; m_emit->cmp(a32::r0, a32::r1); - m_emit->b(a32::lt, &frame_done_loop); - EmitCall(reinterpret_cast(&TimingEvents::RunEvents)); - m_emit->b(&frame_done_loop); + m_emit->b(a32::lt, &main_loop); - // all done - m_emit->Bind(&exit_dispatcher); + // end while + m_emit->Bind(&event_test); + EmitCall(reinterpret_cast(&TimingEvents::RunEvents)); + m_emit->b(&main_loop); RestoreStackAfterCall(stack_adjust); m_register_cache.PopCalleeSavedRegisters(true); diff --git a/src/core/cpu_recompiler_code_generator_aarch64.cpp b/src/core/cpu_recompiler_code_generator_aarch64.cpp index 3b1beaeb33..eea642484d 100644 --- a/src/core/cpu_recompiler_code_generator_aarch64.cpp +++ b/src/core/cpu_recompiler_code_generator_aarch64.cpp @@ -30,9 +30,6 @@ constexpr u64 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224 constexpr u64 FUNCTION_STACK_SIZE = FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE + FUNCTION_CALL_SHADOW_SPACE; -// PC we return to after the end of the block -static void* s_dispatcher_return_address; - static s64 GetPCDisplacement(const void* current, const void* target) { Assert(Common::IsAlignedPow2(reinterpret_cast(current), 4)); @@ -256,7 +253,6 @@ void CodeGenerator::EmitExceptionExit() m_register_cache.PopCalleeSavedRegisters(false); m_emit->Add(a64::sp, a64::sp, FUNCTION_STACK_SIZE); - // m_emit->b(GetPCDisplacement(GetCurrentCodePointer(), s_dispatcher_return_address)); m_emit->Ret(); } @@ -2278,62 +2274,16 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher() EmitLoadGlobalAddress(RCPUPTR, &g_state); - a64::Label frame_done_loop; - a64::Label exit_dispatcher; - m_emit->Bind(&frame_done_loop); - - // if frame_done goto exit_dispatcher - m_emit->ldrb(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, frame_done))); - m_emit->tbnz(a64::w8, 0, &exit_dispatcher); - - // x8 <- sr - a64::Label no_interrupt; - m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, cop0_regs.sr.bits))); - - // if Iec == 0 then goto no_interrupt - m_emit->tbz(a64::w8, 0, &no_interrupt); - - // x9 <- cause - // x8 (sr) & cause - m_emit->ldr(a64::w9, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, cop0_regs.cause.bits))); - m_emit->and_(a64::w8, a64::w8, a64::w9); - - // ((sr & cause) & 0xff00) == 0 goto no_interrupt - m_emit->tst(a64::w8, 0xFF00); - m_emit->b(&no_interrupt, a64::eq); - - // we have an interrupt - EmitCall(reinterpret_cast(&DispatchInterrupt)); - - // no interrupt or we just serviced it - m_emit->Bind(&no_interrupt); - - // TimingEvents::UpdateCPUDowncount: - // x8 <- head event->downcount - // downcount <- x8 - EmitLoadGlobalAddress(8, TimingEvents::GetHeadEventPtr()); - m_emit->ldr(a64::x8, a64::MemOperand(a64::x8)); - m_emit->ldr(a64::w8, a64::MemOperand(a64::x8, offsetof(TimingEvent, m_downcount))); - m_emit->str(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, downcount))); + a64::Label event_test; + m_emit->b(&event_test); // main dispatch loop a64::Label main_loop; m_emit->Bind(&main_loop); - s_dispatcher_return_address = GetCurrentCodePointer(); - - // w8 <- pending_ticks - // w9 <- downcount - m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, pending_ticks))); - m_emit->ldr(a64::w9, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, downcount))); - - // while downcount < pending_ticks - a64::Label downcount_hit; - m_emit->cmp(a64::w8, a64::w9); - m_emit->b(&downcount_hit, a64::ge); // time to lookup the block // w8 <- pc - m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, regs.pc))); + m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, pc))); // x9 <- s_fast_map[pc >> 16] EmitLoadGlobalAddress(10, CodeCache::GetFastMapPointer()); @@ -2345,21 +2295,20 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher() m_emit->ldr(a64::x8, a64::MemOperand(a64::x9, a64::x8, a64::LSL, 3)); m_emit->blr(a64::x8); - // end while - m_emit->Bind(&downcount_hit); - - // check events then for frame done + // w8 <- pending_ticks + // w9 <- downcount m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, pending_ticks))); - EmitLoadGlobalAddress(9, TimingEvents::GetHeadEventPtr()); - m_emit->ldr(a64::x9, a64::MemOperand(a64::x9)); - m_emit->ldr(a64::w9, a64::MemOperand(a64::x9, offsetof(TimingEvent, m_downcount))); + m_emit->ldr(a64::w9, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, downcount))); + + // while downcount < pending_ticks m_emit->cmp(a64::w8, a64::w9); - m_emit->b(&frame_done_loop, a64::lt); + m_emit->b(&main_loop, a64::lt); + + m_emit->Bind(&event_test); EmitCall(reinterpret_cast(&TimingEvents::RunEvents)); - m_emit->b(&frame_done_loop); + m_emit->b(&main_loop); // all done - m_emit->Bind(&exit_dispatcher); RestoreStackAfterCall(stack_adjust); m_register_cache.PopCalleeSavedRegisters(true); m_emit->add(a64::sp, a64::sp, FUNCTION_STACK_SIZE); diff --git a/src/core/cpu_recompiler_code_generator_x64.cpp b/src/core/cpu_recompiler_code_generator_x64.cpp index 439d754a12..24374d8d7b 100644 --- a/src/core/cpu_recompiler_code_generator_x64.cpp +++ b/src/core/cpu_recompiler_code_generator_x64.cpp @@ -3024,59 +3024,17 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher() EmitLoadGlobalAddress(Xbyak::Operand::RBP, &g_state); - Xbyak::Label frame_done_loop; - Xbyak::Label exit_dispatcher; - m_emit->L(frame_done_loop); - - // if frame_done goto exit_dispatcher - m_emit->test(m_emit->byte[m_emit->rbp + offsetof(State, frame_done)], 1); - m_emit->jnz(exit_dispatcher, Xbyak::CodeGenerator::T_NEAR); - - // eax <- sr - Xbyak::Label no_interrupt; - m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, cop0_regs.sr.bits)]); - - // if Iec == 0 then goto no_interrupt - m_emit->test(m_emit->eax, 1); - m_emit->jz(no_interrupt); - - // sr & cause - m_emit->and_(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, cop0_regs.cause.bits)]); - - // ((sr & cause) & 0xff00) == 0 goto no_interrupt - m_emit->test(m_emit->eax, 0xFF00); - m_emit->jz(no_interrupt); - - // we have an interrupt - EmitCall(reinterpret_cast(&DispatchInterrupt)); - - // no interrupt or we just serviced it - m_emit->L(no_interrupt); - - // TimingEvents::UpdateCPUDowncount: - // eax <- head event->downcount - // downcount <- eax - EmitLoadGlobalAddress(Xbyak::Operand::RAX, TimingEvents::GetHeadEventPtr()); - m_emit->mov(m_emit->rax, m_emit->qword[m_emit->rax]); - m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rax + offsetof(TimingEvent, m_downcount)]); - m_emit->mov(m_emit->dword[m_emit->rbp + offsetof(State, downcount)], m_emit->eax); + Xbyak::Label event_test; + m_emit->jmp(event_test); // main dispatch loop Xbyak::Label main_loop; m_emit->align(16); m_emit->L(main_loop); - // eax <- pending_ticks - m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, pending_ticks)]); - - // while eax < downcount - Xbyak::Label downcount_hit; - m_emit->cmp(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, downcount)]); - m_emit->jge(downcount_hit); - // time to lookup the block // eax <- pc - m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, regs.pc)]); + m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, pc)]); // rcx <- s_fast_map[pc >> 16] EmitLoadGlobalAddress(Xbyak::Operand::RBX, CodeCache::GetFastMapPointer()); @@ -3087,22 +3045,19 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher() // call(rcx[pc * 2]) (fast_map[pc >> 2]) m_emit->call(m_emit->qword[m_emit->rcx + m_emit->rax * 2]); - m_emit->jmp(main_loop); + // eax <- pending_ticks + m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, pending_ticks)]); - // end while - m_emit->L(downcount_hit); + // while eax < downcount + Xbyak::Label downcount_hit; + m_emit->cmp(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, downcount)]); + m_emit->jl(main_loop); - // check events then for frame done - EmitLoadGlobalAddress(Xbyak::Operand::RAX, TimingEvents::GetHeadEventPtr()); - m_emit->mov(m_emit->rax, m_emit->qword[m_emit->rax]); - m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rax + offsetof(TimingEvent, m_downcount)]); - m_emit->cmp(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, pending_ticks)]); - m_emit->jg(frame_done_loop); + m_emit->L(event_test); EmitCall(reinterpret_cast(&TimingEvents::RunEvents)); - m_emit->jmp(frame_done_loop); + m_emit->jmp(main_loop); // all done - m_emit->L(exit_dispatcher); RestoreStackAfterCall(stack_adjust); m_register_cache.PopCalleeSavedRegisters(true); m_emit->ret(); diff --git a/src/core/cpu_recompiler_types.h b/src/core/cpu_recompiler_types.h index a6766aa1b2..eaeaeae811 100644 --- a/src/core/cpu_recompiler_types.h +++ b/src/core/cpu_recompiler_types.h @@ -130,6 +130,13 @@ constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128; // Alignment of code stoarge. constexpr u32 CODE_STORAGE_ALIGNMENT = 4096; +#elif defined(CPU_RISCV64) + +using HostReg = unsigned; + +// Alignment of code stoarge. +constexpr u32 CODE_STORAGE_ALIGNMENT = 4096; + #else using HostReg = int; diff --git a/src/core/cpu_types.h b/src/core/cpu_types.h index 1de504a153..fb52978543 100644 --- a/src/core/cpu_types.h +++ b/src/core/cpu_types.h @@ -60,12 +60,8 @@ enum class Reg : u8 sp, fp, ra, - - // not accessible to instructions hi, lo, - pc, - npc, count }; @@ -213,6 +209,7 @@ union Instruction } ALWAYS_INLINE Cop0Instruction Cop0Op() const { return static_cast(bits & UINT32_C(0x3F)); } + ALWAYS_INLINE u32 Cop2Index() const { return ((bits >> 11) & 0x1F) | ((bits >> 17) & 0x20); } } cop; bool IsCop2Instruction() const @@ -240,7 +237,7 @@ struct Registers { union { - u32 r[static_cast(Reg::count)]; + u32 r[static_cast(Reg::count) + 1]; // +1 for the dummy load delay write slot struct { @@ -276,12 +273,8 @@ struct Registers u32 sp; // r29 u32 fp; // r30 u32 ra; // r31 - - // not accessible to instructions u32 hi; u32 lo; - u32 pc; // at execution time: the address of the next instruction to execute (already fetched) - u32 npc; // at execution time: the address of the next instruction to fetch }; }; }; diff --git a/src/core/gdb_protocol.cpp b/src/core/gdb_protocol.cpp index ebb6afd0d8..d97b484e76 100644 --- a/src/core/gdb_protocol.cpp +++ b/src/core/gdb_protocol.cpp @@ -107,7 +107,7 @@ static const std::array REGISTERS { &CPU::g_state.regs.hi, &CPU::g_state.cop0_regs.BadVaddr, &CPU::g_state.cop0_regs.cause.bits, - &CPU::g_state.regs.pc, + &CPU::g_state.pc, }; /// Number of registers in GDB remote protocol for MIPS III. diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index d099a2be4b..c61e7d5af0 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -901,9 +901,10 @@ void GPU::CRTCTickEvent(TickCount ticks) InterruptController::InterruptRequest(InterruptController::IRQ::VBLANK); // flush any pending draws and "scan out" the image + // TODO: move present in here I guess FlushRender(); UpdateDisplay(); - System::FrameDone(); + TimingEvents::SetFrameDone(); // switch fields early. this is needed so we draw to the correct one. if (m_GPUSTAT.InInterleaved480iMode()) diff --git a/src/core/gte.cpp b/src/core/gte.cpp index 25feb02eb3..54ee5dbbf9 100644 --- a/src/core/gte.cpp +++ b/src/core/gte.cpp @@ -4,13 +4,13 @@ #include "gte.h" #include "common/assert.h" #include "common/bitutils.h" -#include "util/state_wrapper.h" #include "cpu_core.h" #include "cpu_core_private.h" #include "host_display.h" #include "pgxp.h" #include "settings.h" #include "timing_event.h" +#include "util/state_wrapper.h" #include #include #include @@ -471,11 +471,12 @@ ALWAYS_INLINE static u32 UNRDivide(u32 lhs, u32 rhs) return std::min(0x1FFFF, result); } -static void MulMatVec(const s16 M[3][3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm) +static void MulMatVec(const s16* M_, const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm) { +#define M(i, j) M_[((i)*3) + (j)] #define dot3(i) \ - TruncateAndSetMACAndIR(SignExtendMACResult((s64(M[i][0]) * s64(Vx)) + (s64(M[i][1]) * s64(Vy))) + \ - (s64(M[i][2]) * s64(Vz)), \ + TruncateAndSetMACAndIR(SignExtendMACResult((s64(M(i, 0)) * s64(Vx)) + (s64(M(i, 1)) * s64(Vy))) + \ + (s64(M(i, 2)) * s64(Vz)), \ shift, lm) dot3(0); @@ -483,15 +484,17 @@ static void MulMatVec(const s16 M[3][3], const s16 Vx, const s16 Vy, const s16 V dot3(2); #undef dot3 +#undef M } -static void MulMatVec(const s16 M[3][3], const s32 T[3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm) +static void MulMatVec(const s16* M_, const s32 T[3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm) { +#define M(i, j) M_[((i)*3) + (j)] #define dot3(i) \ TruncateAndSetMACAndIR( \ - SignExtendMACResult(SignExtendMACResult((s64(T[i]) << 12) + (s64(M[i][0]) * s64(Vx))) + \ - (s64(M[i][1]) * s64(Vy))) + \ - (s64(M[i][2]) * s64(Vz)), \ + SignExtendMACResult(SignExtendMACResult((s64(T[i]) << 12) + (s64(M(i, 0)) * s64(Vx))) + \ + (s64(M(i, 1)) * s64(Vy))) + \ + (s64(M(i, 2)) * s64(Vz)), \ shift, lm) dot3(0); @@ -499,19 +502,20 @@ static void MulMatVec(const s16 M[3][3], const s32 T[3], const s16 Vx, const s16 dot3(2); #undef dot3 +#undef M } -static void MulMatVecBuggy(const s16 M[3][3], const s32 T[3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, - bool lm) +static void MulMatVecBuggy(const s16* M_, const s32 T[3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm) { +#define M(i, j) M_[((i)*3) + (j)] #define dot3(i) \ do \ { \ TruncateAndSetIR(static_cast(SignExtendMACResult(SignExtendMACResult( \ - (s64(T[i]) << 12) + (s64(M[i][0]) * s64(Vx)))) >> \ + (s64(T[i]) << 12) + (s64(M(i, 0)) * s64(Vx)))) >> \ shift), \ false); \ - TruncateAndSetMACAndIR(SignExtendMACResult((s64(M[i][1]) * s64(Vy))) + (s64(M[i][2]) * s64(Vz)), \ + TruncateAndSetMACAndIR(SignExtendMACResult((s64(M(i, 1)) * s64(Vy))) + (s64(M(i, 2)) * s64(Vz)), \ shift, lm); \ } while (0) @@ -520,82 +524,50 @@ static void MulMatVecBuggy(const s16 M[3][3], const s32 T[3], const s16 Vx, cons dot3(2); #undef dot3 +#undef M } static void Execute_MVMVA(Instruction inst) { REGS.FLAG.Clear(); - // TODO: Remove memcpy.. - s16 M[3][3]; - switch (inst.mvmva_multiply_matrix) + static constexpr const s16* M_lookup[4] = {®S.RT[0][0], ®S.LLM[0][0], ®S.LCM[0][0], nullptr}; + static constexpr const s16* V_lookup[4][3] = { + {®S.V0[0], ®S.V0[1], ®S.V0[2]}, + {®S.V1[0], ®S.V1[1], ®S.V1[2]}, + {®S.V2[0], ®S.V2[1], ®S.V2[2]}, + {®S.IR1, ®S.IR2, ®S.IR3}, + }; + static constexpr const s32 zero_T[3] = {}; + static constexpr const s32* T_lookup[4] = {REGS.TR, REGS.BK, REGS.FC, zero_T}; + + const s16* M = M_lookup[inst.mvmva_multiply_matrix]; + const s16* const* const V = V_lookup[inst.mvmva_multiply_vector]; + const s32* const T = T_lookup[inst.mvmva_translation_vector]; + s16 buggy_M[3][3]; + + if (!M) { - case 0: - std::memcpy(M, REGS.RT, sizeof(s16) * 3 * 3); - break; - case 1: - std::memcpy(M, REGS.LLM, sizeof(s16) * 3 * 3); - break; - case 2: - std::memcpy(M, REGS.LCM, sizeof(s16) * 3 * 3); - break; - default: - { - // buggy - M[0][0] = -static_cast(ZeroExtend16(REGS.RGBC[0]) << 4); - M[0][1] = static_cast(ZeroExtend16(REGS.RGBC[0]) << 4); - M[0][2] = REGS.IR0; - M[1][0] = REGS.RT[0][2]; - M[1][1] = REGS.RT[0][2]; - M[1][2] = REGS.RT[0][2]; - M[2][0] = REGS.RT[1][1]; - M[2][1] = REGS.RT[1][1]; - M[2][2] = REGS.RT[1][1]; - } - break; + // buggy + buggy_M[0][0] = -static_cast(ZeroExtend16(REGS.RGBC[0]) << 4); + buggy_M[0][1] = static_cast(ZeroExtend16(REGS.RGBC[0]) << 4); + buggy_M[0][2] = REGS.IR0; + buggy_M[1][0] = REGS.RT[0][2]; + buggy_M[1][1] = REGS.RT[0][2]; + buggy_M[1][2] = REGS.RT[0][2]; + buggy_M[2][0] = REGS.RT[1][1]; + buggy_M[2][1] = REGS.RT[1][1]; + buggy_M[2][2] = REGS.RT[1][1]; + M = &buggy_M[0][0]; } - s16 Vx, Vy, Vz; - switch (inst.mvmva_multiply_vector) - { - case 0: - Vx = REGS.V0[0]; - Vy = REGS.V0[1]; - Vz = REGS.V0[2]; - break; - case 1: - Vx = REGS.V1[0]; - Vy = REGS.V1[1]; - Vz = REGS.V1[2]; - break; - case 2: - Vx = REGS.V2[0]; - Vy = REGS.V2[1]; - Vz = REGS.V2[2]; - break; - default: - Vx = REGS.IR1; - Vy = REGS.IR2; - Vz = REGS.IR3; - break; - } - - static const s32 zero_T[3] = {}; - switch (inst.mvmva_translation_vector) - { - case 0: - MulMatVec(M, REGS.TR, Vx, Vy, Vz, inst.GetShift(), inst.lm); - break; - case 1: - MulMatVec(M, REGS.BK, Vx, Vy, Vz, inst.GetShift(), inst.lm); - break; - case 2: - MulMatVecBuggy(M, REGS.FC, Vx, Vy, Vz, inst.GetShift(), inst.lm); - break; - default: - MulMatVec(M, zero_T, Vx, Vy, Vz, inst.GetShift(), inst.lm); - break; - } + const s16 Vx = *V[0]; + const s16 Vy = *V[1]; + const s16 Vz = *V[2]; + if (inst.mvmva_translation_vector != 2) + MulMatVec(M, T, Vx, Vy, Vz, inst.GetShift(), inst.lm); + else + MulMatVecBuggy(M, T, Vx, Vy, Vz, inst.GetShift(), inst.lm); REGS.FLAG.UpdateError(); } @@ -874,10 +846,10 @@ static ALWAYS_INLINE void InterpolateColor(s64 in_MAC1, s64 in_MAC2, s64 in_MAC3 static void NCS(const s16 V[3], u8 shift, bool lm) { // [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (LLM*V0) SAR (sf*12) - MulMatVec(REGS.LLM, V[0], V[1], V[2], shift, lm); + MulMatVec(®S.LLM[0][0], V[0], V[1], V[2], shift, lm); // [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12) - MulMatVec(REGS.LCM, REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm); + MulMatVec(®S.LCM[0][0], REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm); // Color FIFO = [MAC1/16,MAC2/16,MAC3/16,CODE], [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] PushRGBFromMAC(); @@ -909,10 +881,10 @@ static void Execute_NCT(Instruction inst) static void NCCS(const s16 V[3], u8 shift, bool lm) { // [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (LLM*V0) SAR (sf*12) - MulMatVec(REGS.LLM, V[0], V[1], V[2], shift, lm); + MulMatVec(®S.LLM[0][0], V[0], V[1], V[2], shift, lm); // [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12) - MulMatVec(REGS.LCM, REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm); + MulMatVec(®S.LCM[0][0], REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm); // [MAC1,MAC2,MAC3] = [R*IR1,G*IR2,B*IR3] SHL 4 ;<--- for NCDx/NCCx // [MAC1,MAC2,MAC3] = [MAC1,MAC2,MAC3] SAR (sf*12) ;<--- for NCDx/NCCx @@ -950,10 +922,10 @@ static void Execute_NCCT(Instruction inst) static void NCDS(const s16 V[3], u8 shift, bool lm) { // [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (LLM*V0) SAR (sf*12) - MulMatVec(REGS.LLM, V[0], V[1], V[2], shift, lm); + MulMatVec(®S.LLM[0][0], V[0], V[1], V[2], shift, lm); // [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12) - MulMatVec(REGS.LCM, REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm); + MulMatVec(®S.LCM[0][0], REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm); // No need to assign these to MAC[1-3], as it'll never overflow. // [MAC1,MAC2,MAC3] = [R*IR1,G*IR2,B*IR3] SHL 4 ;<--- for NCDx/NCCx @@ -999,7 +971,7 @@ static void Execute_CC(Instruction inst) const bool lm = inst.lm; // [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12) - MulMatVec(REGS.LCM, REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm); + MulMatVec(®S.LCM[0][0], REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm); // [MAC1,MAC2,MAC3] = [R*IR1,G*IR2,B*IR3] SHL 4 // [MAC1,MAC2,MAC3] = [MAC1,MAC2,MAC3] SAR (sf*12) @@ -1021,7 +993,7 @@ static void Execute_CDP(Instruction inst) const bool lm = inst.lm; // [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12) - MulMatVec(REGS.LCM, REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm); + MulMatVec(®S.LCM[0][0], REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm); // No need to assign these to MAC[1-3], as it'll never overflow. // [MAC1,MAC2,MAC3] = [R*IR1,G*IR2,B*IR3] SHL 4 diff --git a/src/core/pgxp.cpp b/src/core/pgxp.cpp index 41f8543353..06935bd952 100644 --- a/src/core/pgxp.cpp +++ b/src/core/pgxp.cpp @@ -108,8 +108,7 @@ static PGXP_value CP0_reg[32]; #define CPU_Lo CPU_reg[33] // GTE registers -static PGXP_value GTE_data_reg[32]; -static PGXP_value GTE_ctrl_reg[32]; +static PGXP_value GTE_regs[64]; static PGXP_value* Mem = nullptr; static PGXP_value* vertexCache = nullptr; @@ -274,8 +273,7 @@ void Initialize() std::memset(CPU_reg, 0, sizeof(CPU_reg)); std::memset(CP0_reg, 0, sizeof(CP0_reg)); - std::memset(GTE_data_reg, 0, sizeof(GTE_data_reg)); - std::memset(GTE_ctrl_reg, 0, sizeof(GTE_ctrl_reg)); + std::memset(GTE_regs, 0, sizeof(GTE_regs)); if (!Mem) { @@ -306,8 +304,7 @@ void Reset() std::memset(CPU_reg, 0, sizeof(CPU_reg)); std::memset(CP0_reg, 0, sizeof(CP0_reg)); - std::memset(GTE_data_reg, 0, sizeof(GTE_data_reg)); - std::memset(GTE_ctrl_reg, 0, sizeof(GTE_ctrl_reg)); + std::memset(GTE_regs, 0, sizeof(GTE_regs)); if (Mem) std::memset(Mem, 0, sizeof(PGXP_value) * PGXP_MEM_SIZE); @@ -329,8 +326,7 @@ void Shutdown() Mem = nullptr; } - std::memset(GTE_data_reg, 0, sizeof(GTE_data_reg)); - std::memset(GTE_ctrl_reg, 0, sizeof(GTE_ctrl_reg)); + std::memset(GTE_regs, 0, sizeof(GTE_regs)); std::memset(CPU_reg, 0, sizeof(CPU_reg)); std::memset(CP0_reg, 0, sizeof(CP0_reg)); @@ -344,18 +340,19 @@ void Shutdown() #define rt(_instr) ((_instr >> 16) & 0x1F) // The rt part of the instruction register #define rs(_instr) ((_instr >> 21) & 0x1F) // The rs part of the instruction register #define imm(_instr) (_instr & 0xFFFF) // The immediate part of the instruction register +#define cop2idx(_instr) (((_instr >> 11) & 0x1F) | ((_instr >> 17) & 0x20)) -#define SX0 (GTE_data_reg[12].x) -#define SY0 (GTE_data_reg[12].y) -#define SX1 (GTE_data_reg[13].x) -#define SY1 (GTE_data_reg[13].y) -#define SX2 (GTE_data_reg[14].x) -#define SY2 (GTE_data_reg[14].y) +#define SX0 (GTE_regs[12].x) +#define SY0 (GTE_regs[12].y) +#define SX1 (GTE_regs[13].x) +#define SY1 (GTE_regs[13].y) +#define SX2 (GTE_regs[14].x) +#define SY2 (GTE_regs[14].y) -#define SXY0 (GTE_data_reg[12]) -#define SXY1 (GTE_data_reg[13]) -#define SXY2 (GTE_data_reg[14]) -#define SXYP (GTE_data_reg[15]) +#define SXY0 (GTE_regs[12]) +#define SXY1 (GTE_regs[13]) +#define SXY2 (GTE_regs[14]) +#define SXYP (GTE_regs[15]) void GTE_PushSXYZ2f(float x, float y, float z, u32 v) { @@ -428,49 +425,35 @@ static void PGXP_MTC2_int(PGXP_value value, u32 reg) return; } - GTE_data_reg[reg] = value; + GTE_regs[reg] = value; } //////////////////////////////////// // Data transfer tracking //////////////////////////////////// -void CPU_MFC2(u32 instr, u32 rtVal, u32 rdVal) +void CPU_MFC2(u32 instr, u32 rdVal) { // CPU[Rt] = GTE_D[Rd] - Validate(>E_data_reg[rd(instr)], rdVal); - CPU_reg[rt(instr)] = GTE_data_reg[rd(instr)]; - CPU_reg[rt(instr)].value = rtVal; + const u32 idx = cop2idx(instr); + Validate(>E_regs[idx], rdVal); + CPU_reg[rt(instr)] = GTE_regs[idx]; + CPU_reg[rt(instr)].value = rdVal; } -void CPU_MTC2(u32 instr, u32 rdVal, u32 rtVal) +void CPU_MTC2(u32 instr, u32 rtVal) { // GTE_D[Rd] = CPU[Rt] + const u32 idx = cop2idx(instr); Validate(&CPU_reg[rt(instr)], rtVal); - PGXP_MTC2_int(CPU_reg[rt(instr)], rd(instr)); - GTE_data_reg[rd(instr)].value = rdVal; -} - -void CPU_CFC2(u32 instr, u32 rtVal, u32 rdVal) -{ - // CPU[Rt] = GTE_C[Rd] - Validate(>E_ctrl_reg[rd(instr)], rdVal); - CPU_reg[rt(instr)] = GTE_ctrl_reg[rd(instr)]; - CPU_reg[rt(instr)].value = rtVal; -} - -void CPU_CTC2(u32 instr, u32 rdVal, u32 rtVal) -{ - // GTE_C[Rd] = CPU[Rt] - Validate(&CPU_reg[rt(instr)], rtVal); - GTE_ctrl_reg[rd(instr)] = CPU_reg[rt(instr)]; - GTE_ctrl_reg[rd(instr)].value = rdVal; + PGXP_MTC2_int(CPU_reg[rt(instr)], idx); + GTE_regs[idx].value = rtVal; } //////////////////////////////////// // Memory Access //////////////////////////////////// -void CPU_LWC2(u32 instr, u32 rtVal, u32 addr) +void CPU_LWC2(u32 instr, u32 addr, u32 rtVal) { // GTE_D[Rt] = Mem[addr] PGXP_value val; @@ -478,11 +461,11 @@ void CPU_LWC2(u32 instr, u32 rtVal, u32 addr) PGXP_MTC2_int(val, rt(instr)); } -void CPU_SWC2(u32 instr, u32 rtVal, u32 addr) +void CPU_SWC2(u32 instr, u32 addr, u32 rtVal) { // Mem[addr] = GTE_D[Rt] - Validate(>E_data_reg[rt(instr)], rtVal); - WriteMem(>E_data_reg[rt(instr)], addr); + Validate(>E_regs[rt(instr)], rtVal); + WriteMem(>E_regs[rt(instr)], addr); } ALWAYS_INLINE_RELEASE void PGXP_CacheVertex(s16 sx, s16 sy, const PGXP_value& vertex) @@ -575,29 +558,29 @@ bool GetPreciseVertex(u32 addr, u32 value, int x, int y, int xOffs, int yOffs, f #define imm_sext(_instr) \ static_cast(static_cast(_instr & 0xFFFF)) // The immediate part of the instruction register -void CPU_LW(u32 instr, u32 rtVal, u32 addr) +void CPU_LW(u32 instr, u32 addr, u32 rtVal) { // Rt = Mem[Rs + Im] ValidateAndCopyMem(&CPU_reg[rt(instr)], addr, rtVal); } -void CPU_LBx(u32 instr, u32 rtVal, u32 addr) +void CPU_LBx(u32 instr, u32 addr, u32 rtVal) { CPU_reg[rt(instr)] = PGXP_value_invalid; } -void CPU_LHx(u32 instr, u32 rtVal, u32 addr) +void CPU_LHx(u32 instr, u32 addr, u32 rtVal) { // Rt = Mem[Rs + Im] (sign/zero extended) ValidateAndCopyMem16(&CPU_reg[rt(instr)], addr, rtVal, 1); } -void CPU_SB(u32 instr, u8 rtVal, u32 addr) +void CPU_SB(u32 instr, u32 addr, u32 rtVal) { WriteMem(&PGXP_value_invalid, addr); } -void CPU_SH(u32 instr, u16 rtVal, u32 addr) +void CPU_SH(u32 instr, u32 addr, u32 rtVal) { PGXP_value* val = &CPU_reg[rt(instr)]; @@ -606,7 +589,7 @@ void CPU_SH(u32 instr, u16 rtVal, u32 addr) WriteMem16(val, addr); } -void CPU_SW(u32 instr, u32 rtVal, u32 addr) +void CPU_SW(u32 instr, u32 addr, u32 rtVal) { // Mem[Rs + Im] = Rt PGXP_value* val = &CPU_reg[rt(instr)]; @@ -1587,10 +1570,10 @@ void CPU_MFHI(u32 instr, u32 hiVal) CPU_reg[rd(instr)] = CPU_Hi; } -void CPU_MTHI(u32 instr, u32 rdVal) +void CPU_MTHI(u32 instr, u32 rsVal) { // Hi = Rd - Validate(&CPU_reg[rd(instr)], rdVal); + Validate(&CPU_reg[rs(instr)], rsVal); CPU_Hi = CPU_reg[rd(instr)]; } @@ -1603,10 +1586,10 @@ void CPU_MFLO(u32 instr, u32 loVal) CPU_reg[rd(instr)] = CPU_Lo; } -void CPU_MTLO(u32 instr, u32 rdVal) +void CPU_MTLO(u32 instr, u32 rsVal) { // Lo = Rd - Validate(&CPU_reg[rd(instr)], rdVal); + Validate(&CPU_reg[rs(instr)], rsVal); CPU_Lo = CPU_reg[rd(instr)]; } diff --git a/src/core/pgxp.h b/src/core/pgxp.h index 316f4d9dd7..fb7037c8dd 100644 --- a/src/core/pgxp.h +++ b/src/core/pgxp.h @@ -34,24 +34,22 @@ int GTE_NCLIP_valid(u32 sxy0, u32 sxy1, u32 sxy2); float GTE_NCLIP(); // Data transfer tracking -void CPU_MFC2(u32 instr, u32 rtVal, u32 rdVal); // copy GTE data reg to GPR reg (MFC2) -void CPU_MTC2(u32 instr, u32 rdVal, u32 rtVal); // copy GPR reg to GTE data reg (MTC2) -void CPU_CFC2(u32 instr, u32 rtVal, u32 rdVal); // copy GTE ctrl reg to GPR reg (CFC2) -void CPU_CTC2(u32 instr, u32 rdVal, u32 rtVal); // copy GPR reg to GTE ctrl reg (CTC2) +void CPU_MFC2(u32 instr, u32 rdVal); // copy GTE data reg to GPR reg (MFC2) +void CPU_MTC2(u32 instr, u32 rtVal); // copy GPR reg to GTE data reg (MTC2) // Memory Access -void CPU_LWC2(u32 instr, u32 rtVal, u32 addr); // copy memory to GTE reg -void CPU_SWC2(u32 instr, u32 rtVal, u32 addr); // copy GTE reg to memory +void CPU_LWC2(u32 instr, u32 addr, u32 rtVal); // copy memory to GTE reg +void CPU_SWC2(u32 instr, u32 addr, u32 rtVal); // copy GTE reg to memory bool GetPreciseVertex(u32 addr, u32 value, int x, int y, int xOffs, int yOffs, float* out_x, float* out_y, float* out_w); // -- CPU functions -void CPU_LW(u32 instr, u32 rtVal, u32 addr); -void CPU_LHx(u32 instr, u32 rtVal, u32 addr); -void CPU_LBx(u32 instr, u32 rtVal, u32 addr); -void CPU_SB(u32 instr, u8 rtVal, u32 addr); -void CPU_SH(u32 instr, u16 rtVal, u32 addr); -void CPU_SW(u32 instr, u32 rtVal, u32 addr); +void CPU_LW(u32 instr, u32 addr, u32 rtVal); +void CPU_LHx(u32 instr, u32 addr, u32 rtVal); +void CPU_LBx(u32 instr, u32 addr, u32 rtVal); +void CPU_SB(u32 instr, u32 addr, u32 rtVal); +void CPU_SH(u32 instr, u32 addr, u32 rtVal); +void CPU_SW(u32 instr, u32 addr, u32 rtVal); void CPU_MOVE(u32 rd_and_rs, u32 rsVal); // Arithmetic with immediate value @@ -93,9 +91,9 @@ void CPU_SRAV(u32 instr, u32 rtVal, u32 rsVal); // Move registers void CPU_MFHI(u32 instr, u32 hiVal); -void CPU_MTHI(u32 instr, u32 rdVal); +void CPU_MTHI(u32 instr, u32 rsVal); void CPU_MFLO(u32 instr, u32 loVal); -void CPU_MTLO(u32 instr, u32 rdVal); +void CPU_MTLO(u32 instr, u32 rsVal); // CP0 Data transfer tracking void CPU_MFC0(u32 instr, u32 rdVal); diff --git a/src/core/save_state_version.h b/src/core/save_state_version.h index 29bbdc7b0e..86d23007ea 100644 --- a/src/core/save_state_version.h +++ b/src/core/save_state_version.h @@ -5,7 +5,7 @@ #include "types.h" static constexpr u32 SAVE_STATE_MAGIC = 0x43435544; -static constexpr u32 SAVE_STATE_VERSION = 58; +static constexpr u32 SAVE_STATE_VERSION = 59; static constexpr u32 SAVE_STATE_MINIMUM_VERSION = 42; static_assert(SAVE_STATE_VERSION >= SAVE_STATE_MINIMUM_VERSION); diff --git a/src/core/system.cpp b/src/core/system.cpp index 1676835144..bebcf373f2 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -103,20 +103,21 @@ static void DestroySystem(); static std::string GetMediaPathFromSaveState(const char* path); static bool DoLoadState(ByteStream* stream, bool force_software_renderer, bool update_display); static bool DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display, bool is_memory_state); -static void DoRunFrame(); static bool CreateGPU(GPURenderer renderer); static bool SaveUndoLoadState(); +/// Throttles the system, i.e. sleeps until it's time to execute the next frame. +static void Throttle(); + static void SetRewinding(bool enabled); static bool SaveRewindState(); static void DoRewind(); static void SaveRunaheadState(); -static void DoRunahead(); - -static void DoMemorySaveStates(); +static bool DoRunahead(); static bool Initialize(bool force_software_renderer); +static bool FastForwardToFirstFrame(); static bool UpdateGameSettingsLayer(); static void UpdateRunningGame(const char* path, CDImage* image, bool booting); @@ -149,12 +150,16 @@ static std::string s_running_game_serial; static std::string s_running_game_title; static System::GameHash s_running_game_hash; static bool s_running_unknown_game; +static bool s_was_fast_booted; static float s_throttle_frequency = 60.0f; static float s_target_speed = 1.0f; static Common::Timer::Value s_frame_period = 0; static Common::Timer::Value s_next_frame_time = 0; +static bool s_last_frame_skipped = false; +static bool s_system_executing = false; +static bool s_system_interrupted = false; static bool s_frame_step_request = false; static bool s_fast_forward_enabled = false; static bool s_turbo_enabled = false; @@ -208,6 +213,7 @@ static bool s_rewinding_first_save = false; static std::deque s_runahead_states; static bool s_runahead_replay_pending = false; static u32 s_runahead_frames = 0; +static u32 s_runahead_replay_frames = 0; static TinyString GetTimestampStringForFileName() { @@ -227,9 +233,6 @@ void System::SetState(State new_state) Assert(s_state == State::Paused || s_state == State::Running); Assert(new_state == State::Paused || new_state == State::Running); s_state = new_state; - - if (new_state == State::Paused) - CPU::ForceDispatcherExit(); } bool System::IsRunning() @@ -237,6 +240,11 @@ bool System::IsRunning() return s_state == State::Running; } +bool System::IsExecutionInterrupted() +{ + return s_state != State::Running || s_system_interrupted; +} + bool System::IsPaused() { return s_state == State::Paused; @@ -304,18 +312,6 @@ u32 System::GetInternalFrameNumber() return s_internal_frame_number; } -void System::FrameDone() -{ - s_frame_number++; - CPU::g_state.frame_done = true; - CPU::g_state.downcount = 0; -} - -void System::IncrementInternalFrameNumber() -{ - s_internal_frame_number++; -} - const std::string& System::GetDiscPath() { return s_running_game_path; @@ -340,6 +336,11 @@ bool System::IsRunningUnknownGame() return s_running_unknown_game; } +bool System::WasFastBooted() +{ + return s_was_fast_booted; +} + const BIOS::ImageInfo* System::GetBIOSImageInfo() { return s_bios_image_info; @@ -529,7 +530,7 @@ bool System::GetGameDetailsFromImage(CDImage* cdi, std::string* out_id, GameHash pos++; } } - + if (out_id) { if (id.empty()) @@ -644,7 +645,7 @@ std::string System::GetExecutableNameForImage(CDImage* cdi, bool strip_subdirect } bool System::ReadExecutableFromImage(CDImage* cdi, std::string* out_executable_name, - std::vector* out_executable_data) + std::vector* out_executable_data) { ISOReader iso; if (!iso.Open(cdi, 1)) @@ -653,7 +654,8 @@ bool System::ReadExecutableFromImage(CDImage* cdi, std::string* out_executable_n return ReadExecutableFromImage(iso, out_executable_name, out_executable_data); } -bool System::ReadExecutableFromImage(ISOReader& iso, std::string* out_executable_name, std::vector* out_executable_data) +bool System::ReadExecutableFromImage(ISOReader& iso, std::string* out_executable_name, + std::vector* out_executable_data) { const std::string executable_path = GetExecutableNameForImage(iso, false); Log_DevPrintf("Executable path: '%s'", executable_path.c_str()); @@ -886,7 +888,11 @@ void System::ApplySettings(bool display_osd_messages) Host::CheckForSettingsChanges(old_config); if (IsValid()) + { ResetPerformanceCounters(); + if (s_system_executing) + s_system_interrupted = true; + } } bool System::ReloadGameSettings(bool display_osd_messages) @@ -1304,9 +1310,15 @@ bool System::BootSystem(SystemBootParameters parameters) g_settings.bios_patch_fast_boot)) { if (s_bios_image_info && s_bios_image_info->patch_compatible) + { + // TODO: Fast boot without patches... BIOS::PatchBIOSFastBoot(Bus::g_bios, Bus::BIOS_SIZE); + s_was_fast_booted = true; + } else + { Log_ErrorPrintf("Not patching fast boot, as BIOS is not patch compatible."); + } } // Good to go. @@ -1346,6 +1358,9 @@ bool System::BootSystem(SystemBootParameters parameters) if (parameters.load_image_to_ram || g_settings.cdrom_load_image_to_ram) CDROM::PrecacheMedia(); + if (parameters.fast_forward_to_first_frame) + FastForwardToFirstFrame(); + if (g_settings.audio_dump_on_boot) StartDumpingAudio(); @@ -1370,6 +1385,10 @@ bool System::Initialize(bool force_software_renderer) s_turbo_enabled = false; s_fast_forward_enabled = false; + s_rewind_load_frequency = -1; + s_rewind_load_counter = -1; + s_rewinding_first_save = true; + s_average_frame_time_accumulator = 0.0f; s_minimum_frame_time_accumulator = 0.0f; s_maximum_frame_time_accumulator = 0.0f; @@ -1488,6 +1507,7 @@ bool System::Initialize(bool force_software_renderer) void System::DestroySystem() { + DebugAssert(!s_system_executing); if (s_state == State::Shutdown) return; @@ -1528,6 +1548,10 @@ void System::DestroySystem() s_bios_hash = {}; s_bios_image_info = nullptr; + s_was_fast_booted = false; + s_cheat_list.reset(); + + s_state = State::Shutdown; Host::OnSystemDestroyed(); } @@ -1539,8 +1563,6 @@ void System::ClearRunningGame() s_running_game_title.clear(); s_running_game_hash = 0; s_running_unknown_game = false; - s_cheat_list.reset(); - s_state = State::Shutdown; Host::OnGameChanged(s_running_game_path, s_running_game_serial, s_running_game_title); @@ -1549,25 +1571,124 @@ void System::ClearRunningGame() #endif } +bool System::FastForwardToFirstFrame() +{ + // If we're taking more than 60 seconds to load the game, oof.. + static constexpr u32 MAX_FRAMES_TO_SKIP = 30 * 60; + const u32 current_frame_number = s_frame_number; + const u32 current_internal_frame_number = s_internal_frame_number; + + SPU::SetAudioOutputMuted(true); + while (s_internal_frame_number == current_internal_frame_number && + (s_frame_number - current_frame_number) <= MAX_FRAMES_TO_SKIP) + { + Panic("Fixme"); + // System::RunFrame(); + } + SPU::SetAudioOutputMuted(false); + + return (s_internal_frame_number != current_internal_frame_number); +} + void System::Execute() { - while (System::IsRunning()) + for (;;) { - if (s_display_all_frames) - System::RunFrame(); - else - System::RunFrames(); + switch (s_state) + { + case State::Running: + { + s_system_executing = true; - // this can shut us down - Host::PumpMessagesOnCPUThread(); - if (!IsValid()) - return; + // TODO: Purge reset/restore + g_gpu->RestoreGraphicsAPIState(); + + if (s_rewind_load_counter >= 0) + DoRewind(); + else + CPU::Execute(); + + g_gpu->ResetGraphicsAPIState(); + + s_system_executing = false; + continue; + } + + case State::Stopping: + { + DestroySystem(); + return; + } + + case State::Paused: + default: + return; + } + } +} + +void System::FrameDone() +{ + s_frame_number++; + + // Generate any pending samples from the SPU before sleeping, this way we reduce the chances of underruns. + SPU::GeneratePendingSamples(); + + if (s_cheat_list) + s_cheat_list->Apply(); - if (s_frame_step_request) + if (s_frame_step_request) + { + s_frame_step_request = false; + PauseSystem(true); + } + + // Save states for rewind and runahead. + if (s_rewind_save_counter >= 0) + { + if (s_rewind_save_counter == 0) { - s_frame_step_request = false; - PauseSystem(true); + SaveRewindState(); + s_rewind_save_counter = s_rewind_save_frequency; } + else + { + s_rewind_save_counter--; + } + } + else if (s_runahead_frames > 0) + { + // We don't want to poll during replay, because otherwise we'll lose frames. + if (s_runahead_replay_frames == 0) + { + // For runahead, poll input early, that way we can use the remainder of this frame to replay. + // *technically* this means higher input latency (by less than a frame), but runahead itself + // counter-acts that. + Host::PumpMessagesOnCPUThread(); + if (IsExecutionInterrupted()) + { + s_system_interrupted = false; + CPU::ExitExecution(); + return; + } + } + + if (DoRunahead()) + { + // running ahead, get it done as soon as possible + return; + } + + SaveRunaheadState(); + } + + const Common::Timer::Value current_time = Common::Timer::GetCurrentValue(); + if (current_time < s_next_frame_time || s_display_all_frames || s_last_frame_skipped) + { + s_last_frame_skipped = false; + + // TODO: Purge reset/restore + g_gpu->ResetGraphicsAPIState(); const bool skip_present = g_host_display->ShouldSkipDisplayingFrame(); Host::RenderDisplay(skip_present); @@ -1577,14 +1698,109 @@ void System::Execute() s_presents_since_last_update++; } - if (s_throttler_enabled) - System::Throttle(); + g_gpu->RestoreGraphicsAPIState(); + } + else if (current_time >= s_next_frame_time) + { + Log_DebugPrintf("Skipping displaying frame"); + s_last_frame_skipped = true; + } + + if (s_throttler_enabled && !IsExecutionInterrupted()) + Throttle(); + + // Input poll already done above + if (s_runahead_frames == 0) + { + Host::PumpMessagesOnCPUThread(); + + if (IsExecutionInterrupted()) + { + s_system_interrupted = false; + CPU::ExitExecution(); + return; + } + } + + // Update perf counters *after* throttling, we want to measure from start-of-frame + // to start-of-frame, not end-of-frame to end-of-frame (will be noisy due to different + // amounts of computation happening in each frame). + System::UpdatePerformanceCounters(); +} + +void System::SetThrottleFrequency(float frequency) +{ + if (s_throttle_frequency == frequency) + return; + + s_throttle_frequency = frequency; + UpdateThrottlePeriod(); +} + +void System::UpdateThrottlePeriod() +{ + if (s_target_speed > std::numeric_limits::epsilon()) + { + const double target_speed = std::max(static_cast(s_target_speed), std::numeric_limits::epsilon()); + s_frame_period = + Common::Timer::ConvertSecondsToValue(1.0 / (static_cast(s_throttle_frequency) * target_speed)); + } + else + { + s_frame_period = 1; + } + + ResetThrottler(); +} + +void System::ResetThrottler() +{ + s_next_frame_time = Common::Timer::GetCurrentValue() + s_frame_period; +} - // Update perf counters *after* throttling, we want to measure from start-of-frame - // to start-of-frame, not end-of-frame to end-of-frame (will be noisy due to different - // amounts of computation happening in each frame). - System::UpdatePerformanceCounters(); +void System::Throttle() +{ + // If we're running too slow, advance the next frame time based on the time we lost. Effectively skips + // running those frames at the intended time, because otherwise if we pause in the debugger, we'll run + // hundreds of frames when we resume. + Common::Timer::Value current_time = Common::Timer::GetCurrentValue(); + if (current_time > s_next_frame_time) + { + const Common::Timer::Value diff = static_cast(current_time) - static_cast(s_next_frame_time); + s_next_frame_time += (diff / s_frame_period) * s_frame_period + s_frame_period; + return; } + + // Use a spinwait if we undersleep for all platforms except android.. don't want to burn battery. + // Linux also seems to do a much better job of waking up at the requested time. +#if !defined(__linux__) && !defined(__ANDROID__) + Common::Timer::SleepUntil(s_next_frame_time, g_settings.display_all_frames); +#else + Common::Timer::SleepUntil(s_next_frame_time, false); +#endif + + s_next_frame_time += s_frame_period; +} + +void System::SingleStepCPU() +{ + s_frame_timer.Reset(); + s_system_executing = true; + + g_gpu->RestoreGraphicsAPIState(); + + CPU::SingleStep(); + + SPU::GeneratePendingSamples(); + + g_gpu->ResetGraphicsAPIState(); + + s_system_executing = false; +} + +void System::IncrementInternalFrameNumber() +{ + s_internal_frame_number++; } void System::RecreateSystem() @@ -2163,159 +2379,11 @@ bool System::InternalSaveState(ByteStream* state, u32 screenshot_size /* = 256 * return true; } -void System::SingleStepCPU() -{ - const u32 old_frame_number = s_frame_number; - - s_frame_timer.Reset(); - - g_gpu->RestoreGraphicsAPIState(); - - CPU::SingleStep(); - - SPU::GeneratePendingSamples(); - - if (s_frame_number != old_frame_number && s_cheat_list) - s_cheat_list->Apply(); - - g_gpu->ResetGraphicsAPIState(); -} - -void System::DoRunFrame() -{ - g_gpu->RestoreGraphicsAPIState(); - - if (CPU::g_state.use_debug_dispatcher) - { - CPU::ExecuteDebug(); - } - else - { - switch (g_settings.cpu_execution_mode) - { - case CPUExecutionMode::Recompiler: -#ifdef WITH_RECOMPILER - CPU::CodeCache::ExecuteRecompiler(); -#else - CPU::CodeCache::Execute(); -#endif - break; - - case CPUExecutionMode::CachedInterpreter: - CPU::CodeCache::Execute(); - break; - - case CPUExecutionMode::Interpreter: - default: - CPU::Execute(); - break; - } - } - - // Generate any pending samples from the SPU before sleeping, this way we reduce the chances of underruns. - SPU::GeneratePendingSamples(); - - if (s_cheat_list) - s_cheat_list->Apply(); - - g_gpu->ResetGraphicsAPIState(); -} - -void System::RunFrame() -{ - if (s_rewind_load_counter >= 0) - { - DoRewind(); - return; - } - - if (s_runahead_frames > 0) - DoRunahead(); - - DoRunFrame(); - - s_next_frame_time += s_frame_period; - - if (s_memory_saves_enabled) - DoMemorySaveStates(); -} - float System::GetTargetSpeed() { return s_target_speed; } -void System::SetThrottleFrequency(float frequency) -{ - s_throttle_frequency = frequency; - UpdateThrottlePeriod(); -} - -void System::UpdateThrottlePeriod() -{ - if (s_target_speed > std::numeric_limits::epsilon()) - { - const double target_speed = std::max(static_cast(s_target_speed), std::numeric_limits::epsilon()); - s_frame_period = - Common::Timer::ConvertSecondsToValue(1.0 / (static_cast(s_throttle_frequency) * target_speed)); - } - else - { - s_frame_period = 1; - } - - ResetThrottler(); -} - -void System::ResetThrottler() -{ - s_next_frame_time = Common::Timer::GetCurrentValue(); -} - -void System::Throttle() -{ - // If we're running too slow, advance the next frame time based on the time we lost. Effectively skips - // running those frames at the intended time, because otherwise if we pause in the debugger, we'll run - // hundreds of frames when we resume. - Common::Timer::Value current_time = Common::Timer::GetCurrentValue(); - if (current_time > s_next_frame_time) - { - const Common::Timer::Value diff = static_cast(current_time) - static_cast(s_next_frame_time); - s_next_frame_time += (diff / s_frame_period) * s_frame_period; - return; - } - - // Use a spinwait if we undersleep for all platforms except android.. don't want to burn battery. - // Linux also seems to do a much better job of waking up at the requested time. -#if !defined(__linux__) && !defined(__ANDROID__) - Common::Timer::SleepUntil(s_next_frame_time, g_settings.display_all_frames); -#else - Common::Timer::SleepUntil(s_next_frame_time, false); -#endif -} - -void System::RunFrames() -{ - // If we're running more than this in a single loop... we're in for a bad time. - const u32 max_frames_to_run = 2; - u32 frames_run = 0; - - Common::Timer::Value value = Common::Timer::GetCurrentValue(); - while (frames_run < max_frames_to_run) - { - if (value < s_next_frame_time) - break; - - RunFrame(); - frames_run++; - - value = Common::Timer::GetCurrentValue(); - } - - if (frames_run != 1) - Log_VerbosePrintf("Ran %u frames in a single host frame", frames_run); -} - void System::UpdatePerformanceCounters() { const float frame_time = static_cast(s_frame_timer.GetTimeMillisecondsAndReset()); @@ -3625,18 +3693,22 @@ void System::SetRewinding(bool enabled) { if (enabled) { + const bool was_enabled = IsRewinding(); + // Try to rewind at the replay speed, or one per second maximum. const float load_frequency = std::min(g_settings.rewind_save_frequency, 1.0f); s_rewind_load_frequency = static_cast(std::ceil(load_frequency * s_throttle_frequency)); s_rewind_load_counter = 0; + + if (!was_enabled && s_system_executing) + s_system_interrupted = true; } else { s_rewind_load_frequency = -1; s_rewind_load_counter = -1; + s_rewinding_first_save = true; } - - s_rewinding_first_save = true; } void System::DoRewind() @@ -3655,6 +3727,15 @@ void System::DoRewind() } s_next_frame_time += s_frame_period; + + // TODO: Purge reset/restore + g_gpu->ResetGraphicsAPIState(); + Host::RenderDisplay(false); + g_gpu->RestoreGraphicsAPIState(); + + Host::PumpMessagesOnCPUThread(); + + Throttle(); } void System::SaveRunaheadState() @@ -3676,84 +3757,70 @@ void System::SaveRunaheadState() s_runahead_states.push_back(std::move(mss)); } -void System::DoRunahead() +bool System::DoRunahead() { #ifdef PROFILE_MEMORY_SAVE_STATES - Common::Timer timer; - Log_DevPrintf("runahead starting at frame %u", s_frame_number); + static Common::Timer replay_timer; #endif if (s_runahead_replay_pending) { +#ifdef PROFILE_MEMORY_SAVE_STATES + Log_DevPrintf("runahead starting at frame %u", s_frame_number); + replay_timer.Reset(); +#endif + // we need to replay and catch up - load the state, s_runahead_replay_pending = false; if (s_runahead_states.empty() || !LoadMemoryState(s_runahead_states.front())) { s_runahead_states.clear(); - return; + return false; } + // figure out how many frames we need to run to catch up + s_runahead_replay_frames = static_cast(s_runahead_states.size()); + // and throw away all the states, forcing us to catch up below - // TODO: can we leave one frame here and run, avoiding the extra save? s_runahead_states.clear(); -#ifdef PROFILE_MEMORY_SAVE_STATES - Log_VerbosePrintf("Rewound to frame %u, took %.2f ms", s_frame_number, timer.GetTimeMilliseconds()); -#endif - } - - // run the frames with no audio - s32 frames_to_run = static_cast(s_runahead_frames) - static_cast(s_runahead_states.size()); - if (frames_to_run > 0) - { - Common::Timer timer2; -#ifdef PROFILE_MEMORY_SAVE_STATES - const s32 temp = frames_to_run; -#endif - + // run the frames with no audio SPU::SetAudioOutputMuted(true); - while (frames_to_run > 0) - { - DoRunFrame(); - SaveRunaheadState(); - frames_to_run--; - } - - SPU::SetAudioOutputMuted(false); - #ifdef PROFILE_MEMORY_SAVE_STATES - Log_VerbosePrintf("Running %d frames to catch up took %.2f ms", temp, timer2.GetTimeMilliseconds()); + Log_VerbosePrintf("Rewound to frame %u, took %.2f ms", s_frame_number, replay_timer.GetTimeMilliseconds()); #endif + + // we don't want to save the frame we just loaded. but we are "one frame ahead", because the frame we just tossed + // was never saved, so return but don't decrement the counter + return true; } - else + else if (s_runahead_replay_frames == 0) { - // save this frame + return false; + } + + s_runahead_replay_frames--; + if (s_runahead_replay_frames > 0) + { + // keep running ahead SaveRunaheadState(); + return true; } #ifdef PROFILE_MEMORY_SAVE_STATES - Log_DevPrintf("runahead ending at frame %u, took %.2f ms", s_frame_number, timer.GetTimeMilliseconds()); + Log_VerbosePrintf("Running %d frames to catch up took %.2f ms", s_runahead_frames, + replay_timer.GetTimeMilliseconds()); #endif -} -void System::DoMemorySaveStates() -{ - if (s_rewind_save_counter >= 0) - { - if (s_rewind_save_counter == 0) - { - SaveRewindState(); - s_rewind_save_counter = s_rewind_save_frequency; - } - else - { - s_rewind_save_counter--; - } - } + // we're all caught up. this frame gets saved in DoMemoryStates(). + SPU::SetAudioOutputMuted(false); - if (s_runahead_frames > 0) - SaveRunaheadState(); +#ifdef PROFILE_MEMORY_SAVE_STATES + Log_DevPrintf("runahead ending at frame %u, took %.2f ms", s_frame_number, replay_timer.GetTimeMilliseconds()); +#endif + + return false; } void System::SetRunaheadReplayFlag() @@ -3776,7 +3843,10 @@ void System::ShutdownSystem(bool save_resume_state) if (save_resume_state) SaveResumeState(); - DestroySystem(); + if (s_system_executing) + s_state = State::Stopping; + else + DestroySystem(); } bool System::CanUndoLoadState() diff --git a/src/core/system.h b/src/core/system.h index 8318a48819..e51d94cc6c 100644 --- a/src/core/system.h +++ b/src/core/system.h @@ -42,6 +42,7 @@ struct SystemBootParameters u32 media_playlist_index = 0; bool load_image_to_ram = false; bool force_software_renderer = false; + bool fast_forward_to_first_frame = false; }; struct SaveStateInfo @@ -85,7 +86,8 @@ enum class State Shutdown, Starting, Running, - Paused + Paused, + Stopping, }; using GameHash = u64; @@ -110,7 +112,6 @@ ConsoleRegion GetConsoleRegionForDiscRegion(DiscRegion region); std::string GetExecutableNameForImage(CDImage* cdi, bool strip_subdirectories); bool ReadExecutableFromImage(CDImage* cdi, std::string* out_executable_name, std::vector* out_executable_data); -bool IsValidGameImage(CDImage* cdi); std::string GetGameHashId(GameHash hash); bool GetGameDetailsFromImage(CDImage* cdi, std::string* out_id, GameHash* out_hash); DiscRegion GetRegionForSerial(std::string_view serial); @@ -129,6 +130,7 @@ std::string GetInputProfilePath(const std::string_view& name); State GetState(); void SetState(State new_state); bool IsRunning(); +bool IsExecutionInterrupted(); bool IsPaused(); bool IsShutdown(); bool IsValid(); @@ -176,14 +178,15 @@ bool InjectEXEFromBuffer(const void* buffer, u32 buffer_size, bool patch_loader u32 GetFrameNumber(); u32 GetInternalFrameNumber(); -void FrameDone(); void IncrementInternalFrameNumber(); +void FrameDone(); const std::string& GetDiscPath(); const std::string& GetGameSerial(); const std::string& GetGameTitle(); GameHash GetGameHash(); bool IsRunningUnknownGame(); +bool WasFastBooted(); const BIOS::ImageInfo* GetBIOSImageInfo(); const BIOS::Hash& GetBIOSHash(); @@ -237,8 +240,6 @@ void RecreateSystem(); bool RecreateGPU(GPURenderer renderer, bool force_recreate_display = false, bool update_display = true); void SingleStepCPU(); -void RunFrame(); -void RunFrames(); /// Sets target emulation speed. float GetTargetSpeed(); @@ -250,9 +251,6 @@ void SetThrottleFrequency(float frequency); void UpdateThrottlePeriod(); void ResetThrottler(); -/// Throttles the system, i.e. sleeps until it's time to execute the next frame. -void Throttle(); - void UpdatePerformanceCounters(); void ResetPerformanceCounters(); diff --git a/src/core/timing_event.cpp b/src/core/timing_event.cpp index 6ae7af68cc..80204cf670 100644 --- a/src/core/timing_event.cpp +++ b/src/core/timing_event.cpp @@ -17,6 +17,7 @@ static TimingEvent* s_active_events_tail; static TimingEvent* s_current_event = nullptr; static u32 s_active_event_count = 0; static u32 s_global_tick_counter = 0; +static bool s_frame_done = false; u32 GetGlobalTickCounter() { @@ -51,10 +52,7 @@ std::unique_ptr CreateTimingEvent(std::string name, TickCount perio void UpdateCPUDowncount() { - if (!CPU::g_state.frame_done && (!CPU::HasPendingInterrupt() || CPU::g_using_interpreter)) - { - CPU::g_state.downcount = s_active_events_head->GetDowncount(); - } + CPU::g_state.downcount = CPU::HasPendingInterrupt() ? 0 : s_active_events_head->GetDowncount(); } TimingEvent** GetHeadEventPtr() @@ -260,48 +258,76 @@ static TimingEvent* FindActiveEvent(const char* name) return nullptr; } +bool IsRunningEvents() +{ + return (s_current_event != nullptr); +} + +void SetFrameDone() +{ + s_frame_done = true; + CPU::g_state.downcount = 0; +} + void RunEvents() { DebugAssert(!s_current_event); - TickCount pending_ticks = CPU::GetPendingTicks(); - CPU::ResetPendingTicks(); - while (pending_ticks > 0) + do { - const TickCount time = std::min(pending_ticks, s_active_events_head->GetDowncount()); - s_global_tick_counter += static_cast(time); - pending_ticks -= time; + if (CPU::HasPendingInterrupt()) + CPU::DispatchInterrupt(); - // Apply downcount to all events. - // This will result in a negative downcount for those events which are late. - for (TimingEvent* event = s_active_events_head; event; event = event->next) + TickCount pending_ticks = CPU::GetPendingTicks(); + if (pending_ticks >= s_active_events_head->GetDowncount()) { - event->m_downcount -= time; - event->m_time_since_last_run += time; + CPU::ResetPendingTicks(); + + do + { + const TickCount time = std::min(pending_ticks, s_active_events_head->GetDowncount()); + s_global_tick_counter += static_cast(time); + pending_ticks -= time; + + // Apply downcount to all events. + // This will result in a negative downcount for those events which are late. + for (TimingEvent* event = s_active_events_head; event; event = event->next) + { + event->m_downcount -= time; + event->m_time_since_last_run += time; + } + + // Now we can actually run the callbacks. + while (s_active_events_head->m_downcount <= 0) + { + // move it to the end, since that'll likely be its new position + TimingEvent* event = s_active_events_head; + s_current_event = event; + + // Factor late time into the time for the next invocation. + const TickCount ticks_late = -event->m_downcount; + const TickCount ticks_to_execute = event->m_time_since_last_run; + event->m_downcount += event->m_interval; + event->m_time_since_last_run = 0; + + // The cycles_late is only an indicator, it doesn't modify the cycles to execute. + event->m_callback(event->m_callback_param, ticks_to_execute, ticks_late); + if (event->m_active) + SortEvent(event); + } + } while (pending_ticks > 0); + + s_current_event = nullptr; } - // Now we can actually run the callbacks. - while (s_active_events_head->m_downcount <= 0) + if (s_frame_done) { - // move it to the end, since that'll likely be its new position - TimingEvent* event = s_active_events_head; - s_current_event = event; - - // Factor late time into the time for the next invocation. - const TickCount ticks_late = -event->m_downcount; - const TickCount ticks_to_execute = event->m_time_since_last_run; - event->m_downcount += event->m_interval; - event->m_time_since_last_run = 0; - - // The cycles_late is only an indicator, it doesn't modify the cycles to execute. - event->m_callback(event->m_callback_param, ticks_to_execute, ticks_late); - if (event->m_active) - SortEvent(event); + s_frame_done = false; + System::FrameDone(); } - } - s_current_event = nullptr; - UpdateCPUDowncount(); + UpdateCPUDowncount(); + } while (CPU::GetPendingTicks() >= CPU::g_state.downcount); } bool DoState(StateWrapper& sw) @@ -347,7 +373,7 @@ bool DoState(StateWrapper& sw) sw.Do(&last_event_run_time); } - Log_DevPrintf("Loaded %u events from save state.", event_count); + Log_DebugPrintf("Loaded %u events from save state.", event_count); SortEvents(); } else @@ -364,7 +390,7 @@ bool DoState(StateWrapper& sw) sw.Do(&event->m_interval); } - Log_DevPrintf("Wrote %u events to save state.", s_active_event_count); + Log_DebugPrintf("Wrote %u events to save state.", s_active_event_count); } return !sw.HasError(); @@ -407,6 +433,8 @@ void TimingEvent::Delay(TickCount ticks) DebugAssert(TimingEvents::s_current_event != this); TimingEvents::SortEvent(this); + if (TimingEvents::s_active_events_head == this) + TimingEvents::UpdateCPUDowncount(); } void TimingEvent::Schedule(TickCount ticks) @@ -426,7 +454,11 @@ void TimingEvent::Schedule(TickCount ticks) // Event is already active, so we leave the time since last run alone, and just modify the downcount. // If this is a call from an IO handler for example, re-sort the event queue. if (TimingEvents::s_current_event != this) + { TimingEvents::SortEvent(this); + if (TimingEvents::s_active_events_head == this) + TimingEvents::UpdateCPUDowncount(); + } } } @@ -451,7 +483,11 @@ void TimingEvent::Reset() m_downcount = m_interval; m_time_since_last_run = 0; if (TimingEvents::s_current_event != this) + { TimingEvents::SortEvent(this); + if (TimingEvents::s_active_events_head == this) + TimingEvents::UpdateCPUDowncount(); + } } void TimingEvent::InvokeEarly(bool force /* = false */) @@ -471,6 +507,8 @@ void TimingEvent::InvokeEarly(bool force /* = false */) // Since we've changed the downcount, we need to re-sort the events. DebugAssert(TimingEvents::s_current_event != this); TimingEvents::SortEvent(this); + if (TimingEvents::s_active_events_head == this) + TimingEvents::UpdateCPUDowncount(); } void TimingEvent::Activate() diff --git a/src/core/timing_event.h b/src/core/timing_event.h index d3a8c3c3dd..08c2698da3 100644 --- a/src/core/timing_event.h +++ b/src/core/timing_event.h @@ -93,6 +93,8 @@ std::unique_ptr CreateTimingEvent(std::string name, TickCount perio /// Serialization. bool DoState(StateWrapper& sw); +bool IsRunningEvents(); +void SetFrameDone(); void RunEvents(); void UpdateCPUDowncount(); diff --git a/src/duckstation-nogui/win32_nogui_platform.cpp b/src/duckstation-nogui/win32_nogui_platform.cpp index 9e34935d74..afa14227af 100644 --- a/src/duckstation-nogui/win32_nogui_platform.cpp +++ b/src/duckstation-nogui/win32_nogui_platform.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "win32_nogui_platform.h" @@ -161,7 +161,7 @@ std::optional Win32NoGUIPlatform::GetPlatformWindowInfo() return std::nullopt; RECT rc = {}; - GetWindowRect(m_hwnd, &rc); + GetClientRect(m_hwnd, &rc); WindowInfo wi; wi.surface_width = static_cast(rc.right - rc.left); @@ -338,7 +338,7 @@ LRESULT CALLBACK Win32NoGUIPlatform::WndProc(HWND hwnd, UINT msg, WPARAM wParam, const WCHAR utf16[1] = {static_cast(wParam)}; char utf8[8] = {}; const int utf8_len = WideCharToMultiByte(CP_UTF8, 0, utf16, static_cast(std::size(utf16)), utf8, - static_cast(sizeof(utf8)) - 1, nullptr, nullptr); + static_cast(sizeof(utf8) - 1), nullptr, nullptr); if (utf8_len > 0) { utf8[utf8_len] = 0; @@ -439,4 +439,4 @@ std::unique_ptr NoGUIPlatform::CreateWin32Platform() return {}; return ret; -} \ No newline at end of file +} diff --git a/src/duckstation-nogui/win32_nogui_platform.h b/src/duckstation-nogui/win32_nogui_platform.h index 836cc97d36..d6ad40f20e 100644 --- a/src/duckstation-nogui/win32_nogui_platform.h +++ b/src/duckstation-nogui/win32_nogui_platform.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once diff --git a/src/duckstation-nogui/x11_nogui_platform.cpp b/src/duckstation-nogui/x11_nogui_platform.cpp index 760d0d555b..2dcc270bdc 100644 --- a/src/duckstation-nogui/x11_nogui_platform.cpp +++ b/src/duckstation-nogui/x11_nogui_platform.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "x11_nogui_platform.h" @@ -24,7 +24,7 @@ X11NoGUIPlatform::~X11NoGUIPlatform() bool X11NoGUIPlatform::Initialize() { const int res = XInitThreads(); - if (res != 0) + if (res == 0) Log_WarningPrintf("XInitThreads() returned %d, things might not be stable.", res); m_display = XOpenDisplay(nullptr); @@ -34,6 +34,7 @@ bool X11NoGUIPlatform::Initialize() return false; } + InitializeKeyMap(); return true; } @@ -68,7 +69,7 @@ bool X11NoGUIPlatform::CreatePlatformWindow(std::string title) window_height, 0, 0, BlackPixel(m_display, 0)); if (!m_window) { - Log_ErrorPrintf("Failed to create X window"); + Log_ErrorPrint("Failed to create X window"); return false; } @@ -92,7 +93,6 @@ bool X11NoGUIPlatform::CreatePlatformWindow(std::string title) XMapRaised(m_display, m_window); XFlush(m_display); XSync(m_display, True); - InitializeKeyMap(); } ProcessXEvents(); @@ -144,15 +144,15 @@ void X11NoGUIPlatform::InitializeKeyMap() if (keysym == NoSymbol) continue; - KeySym upper_sym; - XConvertCase(keysym, &keysym, &upper_sym); + KeySym upper_keysym; + XConvertCase(keysym, &keysym, &upper_keysym); // Would this fail? - const char* keyname = XKeysymToString(keysym); + const char* keyname = XKeysymToString(upper_keysym); if (!keyname) continue; - m_key_map.emplace(static_cast(keysym), keyname); + m_key_map.emplace(static_cast(upper_keysym), keyname); } } @@ -160,7 +160,7 @@ std::optional X11NoGUIPlatform::ConvertHostKeyboardStringToCode(const std:: { for (const auto& it : m_key_map) { - if (StringUtil::Strncasecmp(it.second.c_str(), str.data(), str.length()) == 0) + if (str == it.second) return it.first; } @@ -175,7 +175,7 @@ std::optional X11NoGUIPlatform::ConvertHostKeyboardCodeToString(u32 void X11NoGUIPlatform::ProcessXEvents() { - XLockDisplay(m_display); + XDisplayLocker locker(m_display); for (int num_events = XPending(m_display); num_events > 0; num_events--) { @@ -186,19 +186,30 @@ void X11NoGUIPlatform::ProcessXEvents() case KeyPress: case KeyRelease: { - const KeySym sym = XLookupKeysym(&event.xkey, 0); + KeySym sym = XLookupKeysym(&event.xkey, 0); if (sym != NoSymbol) - NoGUIHost::ProcessPlatformKeyEvent(static_cast(sym), (event.type == KeyPress)); + { + KeySym upper_sym = sym; + XConvertCase(sym, &sym, &upper_sym); + NoGUIHost::ProcessPlatformKeyEvent(static_cast(upper_sym), (event.type == KeyPress)); + } } break; case ButtonPress: case ButtonRelease: { - if (event.xbutton.button >= Button1) + if (event.xbutton.button >= Button4 && event.xbutton.button <= Button5) { - NoGUIHost::ProcessPlatformMouseButtonEvent(static_cast(event.xbutton.button - Button1), - event.type == ButtonPress); + // Button 4/5 are mouse wheel events on X, apparently... + NoGUIHost::ProcessPlatformMouseWheelEvent(0.0f, (event.xbutton.button == Button4) ? 1.0f : -1.0f); + } + else if (event.xbutton.button >= Button1) + { + // Swap middle and right buttons. + const u32 xbutton = event.xbutton.button; + const u32 mapped_button = (xbutton == Button3) ? 1 : (xbutton == Button2 ? 2 : (xbutton - Button1)); + NoGUIHost::ProcessPlatformMouseButtonEvent(mapped_button, event.type == ButtonPress); } } break; @@ -241,8 +252,6 @@ void X11NoGUIPlatform::ProcessXEvents() break; } } - - XUnlockDisplay(m_display); } void X11NoGUIPlatform::RunMessageLoop() diff --git a/src/duckstation-qt/debuggerwindow.cpp b/src/duckstation-qt/debuggerwindow.cpp index f63ea61964..786146f624 100644 --- a/src/duckstation-qt/debuggerwindow.cpp +++ b/src/duckstation-qt/debuggerwindow.cpp @@ -58,13 +58,13 @@ void DebuggerWindow::refreshAll() m_stack_model->invalidateView(); m_ui.memoryView->repaint(); - m_code_model->setPC(CPU::g_state.regs.pc); + m_code_model->setPC(CPU::g_state.pc); scrollToPC(); } void DebuggerWindow::scrollToPC() { - return scrollToCodeAddress(CPU::g_state.regs.pc); + return scrollToCodeAddress(CPU::g_state.pc); } void DebuggerWindow::scrollToCodeAddress(VirtualMemoryAddress address) diff --git a/src/duckstation-regtest/regtest_host.cpp b/src/duckstation-regtest/regtest_host.cpp index ae2727faab..37584d9ddd 100644 --- a/src/duckstation-regtest/regtest_host.cpp +++ b/src/duckstation-regtest/regtest_host.cpp @@ -95,10 +95,10 @@ bool RegTestHost::InitializeConfig() si.SetStringValue("MemoryCards", "Card2Type", Settings::GetMemoryCardTypeName(MemoryCardType::None)); si.SetStringValue("ControllerPorts", "MultitapMode", Settings::GetMultitapModeName(MultitapMode::Disabled)); si.SetStringValue("Audio", "Backend", Settings::GetAudioBackendName(AudioBackend::Null)); - si.SetStringValue("Logging", "LogLevel", Settings::GetLogLevelName(LOGLEVEL_VERBOSE)); si.SetBoolValue("Logging", "LogToConsole", true); si.SetBoolValue("Main", "ApplyGameSettings", false); // don't want game settings interfering si.SetBoolValue("BIOS", "PatchFastBoot", true); // no point validating the bios intro.. + si.SetFloatValue("Main", "EmulationSpeed", 0.0f); // disable all sources for (u32 i = 0; i < static_cast(InputSourceType::Count); i++) @@ -251,7 +251,9 @@ void Host::OnGameChanged(const std::string& disc_path, const std::string& game_s void Host::PumpMessagesOnCPUThread() { - // + s_frames_to_run--; + if (s_frames_to_run == 0) + System::ShutdownSystem(false); } void Host::RunOnCPUThread(std::function function, bool block /* = false */) @@ -496,6 +498,7 @@ bool RegTestHost::ParseCommandLineParameters(int argc, char* argv[], std::option } Log::SetConsoleOutputParams(true, nullptr, level.value()); + s_base_settings_interface->SetStringValue("Logging", "LogLevel", Settings::GetLogLevelName(level.value())); continue; } else if (CHECK_ARG_PARAM("-renderer")) @@ -577,16 +580,7 @@ int main(int argc, char* argv[]) } Log_InfoPrintf("Running for %d frames...", s_frames_to_run); - - for (u32 frame = 0; frame < s_frames_to_run; frame++) - { - System::RunFrame(); - Host::RenderDisplay(false); - System::UpdatePerformanceCounters(); - } - - Log_InfoPrintf("All done, shutting down system."); - System::ShutdownSystem(false); + System::Execute(); Log_InfoPrintf("Exiting with success."); result = 0; diff --git a/src/frontend-common/fullscreen_ui.cpp b/src/frontend-common/fullscreen_ui.cpp index c90e5c54e0..cfe4e8e204 100644 --- a/src/frontend-common/fullscreen_ui.cpp +++ b/src/frontend-common/fullscreen_ui.cpp @@ -2690,7 +2690,7 @@ void FullscreenUI::DrawInterfaceSettingsPage() DrawToggleSetting(bsi, ICON_FA_SPINNER " Show GPU Usage", "Shows the host's GPU usage in the top-right corner of the display.", "Display", "ShowGPU", false); DrawToggleSetting(bsi, ICON_FA_RULER_HORIZONTAL " Show Frame Times", - "Shows a visual history of frame times in the upper-left corner of the display.", "EmuCore/GS", + "Shows a visual history of frame times in the upper-left corner of the display.", "Display", "ShowFrameTimes", false); DrawToggleSetting(bsi, ICON_FA_RULER_VERTICAL " Show Resolution", "Shows the current rendering resolution of the system in the top-right corner of the display.", diff --git a/src/util/jit_code_buffer.cpp b/src/util/jit_code_buffer.cpp index fc13efc032..6a8378824c 100644 --- a/src/util/jit_code_buffer.cpp +++ b/src/util/jit_code_buffer.cpp @@ -209,7 +209,7 @@ void JitCodeBuffer::CommitCode(u32 length) if (length == 0) return; -#if defined(CPU_AARCH32) || defined(CPU_AARCH64) +#if defined(CPU_AARCH32) || defined(CPU_AARCH64) || defined(CPU_RISCV64) // ARM instruction and data caches are not coherent, we need to flush after every block. FlushInstructionCache(m_free_code_ptr, length); #endif @@ -224,7 +224,7 @@ void JitCodeBuffer::CommitFarCode(u32 length) if (length == 0) return; -#if defined(CPU_AARCH32) || defined(CPU_AARCH64) +#if defined(CPU_AARCH32) || defined(CPU_AARCH64) || defined(CPU_RISCV64) // ARM instruction and data caches are not coherent, we need to flush after every block. FlushInstructionCache(m_free_far_code_ptr, length); #endif diff --git a/src/util/jit_code_buffer.h b/src/util/jit_code_buffer.h index 5d09e96f2e..d310c8e71e 100644 --- a/src/util/jit_code_buffer.h +++ b/src/util/jit_code_buffer.h @@ -21,6 +21,15 @@ class JitCodeBuffer ALWAYS_INLINE u8* GetCodePointer() const { return m_code_ptr; } ALWAYS_INLINE u32 GetTotalSize() const { return m_total_size; } + ALWAYS_INLINE float GetUsedPct() const + { + return (static_cast(m_code_used) / static_cast(m_code_size)) * 100.0f; + } + ALWAYS_INLINE float GetFarUsedPct() const + { + return (static_cast(m_far_code_used) / static_cast(m_far_code_size)) * 100.0f; + } + ALWAYS_INLINE u32 GetTotalUsed() const { return m_code_used + m_far_code_used; } ALWAYS_INLINE u8* GetFreeCodePointer() const { return m_free_code_ptr; } ALWAYS_INLINE u32 GetFreeCodeSpace() const { return static_cast(m_code_size - m_code_used); } diff --git a/src/util/page_fault_handler.cpp b/src/util/page_fault_handler.cpp index 8384178df8..f9ba722b38 100644 --- a/src/util/page_fault_handler.cpp +++ b/src/util/page_fault_handler.cpp @@ -81,6 +81,14 @@ static bool IsStoreInstruction(const void* ptr) return false; } } +#elif defined(CPU_RISCV64) +static bool IsStoreInstruction(const void* ptr) +{ + u32 bits; + std::memcpy(&bits, ptr, sizeof(bits)); + + return ((bits & 0x7Fu) == 0b0100011u); +} #endif #if defined(_WIN32) && (defined(CPU_X64) || defined(CPU_AARCH64)) @@ -143,6 +151,9 @@ static void SIGSEGVHandler(int sig, siginfo_t* info, void* ctx) #elif defined(CPU_AARCH64) void* const exception_pc = reinterpret_cast(static_cast(ctx)->uc_mcontext.pc); const bool is_write = IsStoreInstruction(exception_pc); +#elif defined(CPU_RISCV64) + void* const exception_pc = reinterpret_cast(static_cast(ctx)->uc_mcontext.__gregs[REG_PC]); + const bool is_write = IsStoreInstruction(exception_pc); #else void* const exception_pc = nullptr; const bool is_write = false;