Skip to content

Commit

Permalink
System: Refactor main loop
Browse files Browse the repository at this point in the history
Reduces JIT exits.
Improves runahead performance.
  • Loading branch information
stenzek committed Aug 15, 2023
1 parent 2732eb8 commit b2381ad
Show file tree
Hide file tree
Showing 43 changed files with 1,333 additions and 913 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,8 @@ elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "arm" OR "${CMAKE_SYSTEM_PROCESSOR}"
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -marm -march=armv7-a")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -marm -march=armv7-a")
endif()
elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "riscv64")
set(CPU_ARCH "riscv64")
else()
message(FATAL_ERROR "Unknown system processor: ${CMAKE_SYSTEM_PROCESSOR}")
endif()
Expand Down
5 changes: 4 additions & 1 deletion dep/vixl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -88,4 +88,7 @@ if(${CPU_ARCH} STREQUAL "aarch64")
)
endif()


if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
message("Enabling vixl debug assertions")
target_compile_definitions(vixl PUBLIC VIXL_DEBUG)
endif()
10 changes: 10 additions & 0 deletions src/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ add_library(common
dimensional_array.h
error.cpp
error.h
fastjmp.cpp
fastjmp.h
fifo_queue.h
file_system.cpp
file_system.h
Expand Down Expand Up @@ -97,6 +99,14 @@ if(WIN32)
windows_headers.h
)
target_link_libraries(common PRIVATE d3dcompiler.lib)

if(${CPU_ARCH} STREQUAL "x64")
enable_language(ASM_MASM)
target_sources(common PRIVATE fastjmp_x86.asm)
elseif(${CPU_ARCH} STREQUAL "aarch32" OR ${CPU_ARCH} STREQUAL "aarch64")
enable_language(ASM_MARMASM)
target_sources(common PRIVATE fastjmp_arm.asm)
endif()
endif()

if(NOT WIN32 AND NOT ANDROID)
Expand Down
20 changes: 20 additions & 0 deletions src/common/common.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
<ClInclude Include="dimensional_array.h" />
<ClInclude Include="easing.h" />
<ClInclude Include="error.h" />
<ClInclude Include="fastjmp.h" />
<ClInclude Include="fifo_queue.h" />
<ClInclude Include="file_system.h" />
<ClInclude Include="gl\context.h">
Expand Down Expand Up @@ -123,6 +124,7 @@
<ClCompile Include="d3d12\stream_buffer.cpp" />
<ClCompile Include="d3d12\texture.cpp" />
<ClCompile Include="d3d12\util.cpp" />
<ClCompile Include="fastjmp.cpp" />
<ClCompile Include="file_system.cpp" />
<ClCompile Include="gl\context.cpp">
<ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
Expand Down Expand Up @@ -192,6 +194,16 @@
<Natvis Include="bitfield.natvis" />
</ItemGroup>
<ItemGroup>
<MARMASM Include="fastjmp_arm.asm">
<FileType>Document</FileType>
<ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
</MARMASM>
<MASM Include="fastjmp_x86.asm">
<FileType>Document</FileType>
<ExcludedFromBuild Condition="'$(Platform)'!='Win32' And '$(Platform)'!='x64'">true</ExcludedFromBuild>
<PreprocessorDefinitions Condition="'$(Platform)'=='Win32'">_M_X86_32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="'$(Platform)'=='x64'">_M_X86_64;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</MASM>
<None Include="vulkan\entry_points.inl">
<ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
</None>
Expand Down Expand Up @@ -219,9 +231,17 @@
<Project>{73ee0c55-6ffe-44e7-9c12-baa52434a797}</Project>
</ProjectReference>
</ItemGroup>
<ImportGroup Label="ExtensionTargets">
<Import Project="$(VCTargetsPath)\BuildCustomizations\marmasm.targets" />
<Import Project="$(VCTargetsPath)\BuildCustomizations\masm.targets" />
</ImportGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{EE054E08-3799-4A59-A422-18259C105FFD}</ProjectGuid>
</PropertyGroup>
<ImportGroup Label="ExtensionSettings">
<Import Project="$(VCTargetsPath)\BuildCustomizations\marmasm.props" />
<Import Project="$(VCTargetsPath)\BuildCustomizations\masm.props" />
</ImportGroup>
<Import Project="..\..\dep\msvc\vsprops\StaticLibrary.props" />
<Import Project="common.props" />
<ItemDefinitionGroup>
Expand Down
8 changes: 8 additions & 0 deletions src/common/common.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@
<ClInclude Include="build_timestamp.h" />
<ClInclude Include="sha1_digest.h" />
<ClInclude Include="gpu_texture.h" />
<ClInclude Include="fastjmp.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="gl\program.cpp">
Expand Down Expand Up @@ -234,6 +235,7 @@
<ClCompile Include="threading.cpp" />
<ClCompile Include="sha1_digest.cpp" />
<ClCompile Include="gpu_texture.cpp" />
<ClCompile Include="fastjmp.cpp" />
</ItemGroup>
<ItemGroup>
<Natvis Include="bitfield.natvis" />
Expand All @@ -260,4 +262,10 @@
<Filter>vulkan</Filter>
</None>
</ItemGroup>
<ItemGroup>
<MASM Include="fastjmp_x86.asm" />
</ItemGroup>
<ItemGroup>
<MARMASM Include="fastjmp_arm.asm" />
</ItemGroup>
</Project>
166 changes: 166 additions & 0 deletions src/common/fastjmp.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
// SPDX-FileCopyrightText: 2021 Connor McLaughlin <[email protected]>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)

#ifndef _WIN32

#include "fastjmp.h"

#if defined(__APPLE__)
#define PREFIX "_"
#else
#define PREFIX ""
#endif

#if defined(__x86_64__)

asm("\t.global " PREFIX "fastjmp_set\n"
"\t.global " PREFIX "fastjmp_jmp\n"
"\t.text\n"
"\t" PREFIX "fastjmp_set:"
R"(
movq 0(%rsp), %rax
movq %rsp, %rdx # fixup stack pointer, so it doesn't include the call to fastjmp_set
addq $8, %rdx
movq %rax, 0(%rdi) # actually rip
movq %rbx, 8(%rdi)
movq %rdx, 16(%rdi) # actually rsp
movq %rbp, 24(%rdi)
movq %r12, 32(%rdi)
movq %r13, 40(%rdi)
movq %r14, 48(%rdi)
movq %r15, 56(%rdi)
xorl %eax, %eax
ret
)"
"\t" PREFIX "fastjmp_jmp:"
R"(
movl %esi, %eax
movq 0(%rdi), %rdx # actually rip
movq 8(%rdi), %rbx
movq 16(%rdi), %rsp # actually rsp
movq 24(%rdi), %rbp
movq 32(%rdi), %r12
movq 40(%rdi), %r13
movq 48(%rdi), %r14
movq 56(%rdi), %r15
jmp *%rdx
)");

#elif defined(__aarch64__)

asm(
"\t.global " PREFIX "fastjmp_set\n"
"\t.global " PREFIX "fastjmp_jmp\n"
"\t.text\n"
"\t.align 16\n"
"\t" PREFIX "fastjmp_set:" R"(
mov x16, sp
stp x16, x30, [x0]
stp x19, x20, [x0, #16]
stp x21, x22, [x0, #32]
stp x23, x24, [x0, #48]
stp x25, x26, [x0, #64]
stp x27, x28, [x0, #80]
str x29, [x0, #96]
stp d8, d9, [x0, #112]
stp d10, d11, [x0, #128]
stp d12, d13, [x0, #144]
stp d14, d15, [x0, #160]
mov w0, wzr
br x30
)"
".align 16\n"
"\t" PREFIX "fastjmp_jmp:" R"(
ldp x16, x30, [x0]
mov sp, x16
ldp x19, x20, [x0, #16]
ldp x21, x22, [x0, #32]
ldp x23, x24, [x0, #48]
ldp x25, x26, [x0, #64]
ldp x27, x28, [x0, #80]
ldr x29, [x0, #96]
ldp d8, d9, [x0, #112]
ldp d10, d11, [x0, #128]
ldp d12, d13, [x0, #144]
ldp d14, d15, [x0, #160]
mov w0, w1
br x30
)");

#elif defined(__riscv) && __riscv_xlen == 64

asm(
"\t.global " PREFIX "fastjmp_set\n"
"\t.global " PREFIX "fastjmp_jmp\n"
"\t.text\n"
"\t.align 16\n"
"\t" PREFIX "fastjmp_set:" R"(
sd sp, 0(a0)
sd s0, 8(a0)
sd s1, 16(a0)
sd s2, 24(a0)
sd s3, 32(a0)
sd s4, 40(a0)
sd s5, 48(a0)
sd s6, 56(a0)
sd s7, 64(a0)
sd s8, 72(a0)
sd s9, 80(a0)
sd s10, 88(a0)
sd s11, 96(a0)
fsd fs0, 104(a0)
fsd fs1, 112(a0)
fsd fs2, 120(a0)
fsd fs3, 128(a0)
fsd fs4, 136(a0)
fsd fs5, 144(a0)
fsd fs6, 152(a0)
fsd fs7, 160(a0)
fsd fs8, 168(a0)
fsd fs9, 176(a0)
fsd fs10, 184(a0)
fsd fs11, 192(a0)
sd ra, 208(a0)
li a0, 0
jr ra
)"
".align 16\n"
"\t" PREFIX "fastjmp_jmp:" R"(
ld ra, 208(a0)
fld fs11, 192(a0)
fld fs10, 184(a0)
fld fs9, 176(a0)
fld fs8, 168(a0)
fld fs7, 160(a0)
fld fs6, 152(a0)
fld fs5, 144(a0)
fld fs4, 136(a0)
fld fs3, 128(a0)
fld fs2, 120(a0)
fld fs1, 112(a0)
fld fs0, 104(a0)
ld s11, 96(a0)
ld s10, 88(a0)
ld s9, 80(a0)
ld s8, 72(a0)
ld s7, 64(a0)
ld s6, 56(a0)
ld s5, 48(a0)
ld s4, 40(a0)
ld s3, 32(a0)
ld s2, 24(a0)
ld s1, 16(a0)
ld s0, 8(a0)
ld sp, 0(a0)
mv a0, a1
jr ra
)");


#else

#error Unknown platform.

#endif

#endif // __WIN32
33 changes: 33 additions & 0 deletions src/common/fastjmp.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// SPDX-FileCopyrightText: 2021 Connor McLaughlin <[email protected]>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)

#pragma once

#include "types.h"

#include <cstddef>
#include <cstdint>

struct fastjmp_buf
{
#if defined(_WIN32) && defined(_M_AMD64)
static constexpr std::size_t BUF_SIZE = 240;
#elif defined(_M_ARM64) || defined(__aarch64__)
static constexpr std::size_t BUF_SIZE = 168;
#elif defined(__x86_64__)
static constexpr std::size_t BUF_SIZE = 64;
#elif defined(_M_IX86) || defined(__i386__)
static constexpr std::size_t BUF_SIZE = 24;
#elif defined(__riscv) && __riscv_xlen == 64
static constexpr std::size_t BUF_SIZE = 208;
#else
#error Unknown architecture.
#endif

alignas(16) std::uint8_t buf[BUF_SIZE];
};

extern "C" {
int fastjmp_set(fastjmp_buf* buf);
[[noreturn]] void fastjmp_jmp(const fastjmp_buf* buf, int ret);
}
47 changes: 47 additions & 0 deletions src/common/fastjmp_arm.asm
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
; SPDX-FileCopyrightText: 2021 Connor McLaughlin <[email protected]>
; SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)

#include "ksarm64.h"

EXPORT fastjmp_set
EXPORT fastjmp_jmp

TEXTAREA

; void fastjmp_set(fastjmp_buf*)
LEAF_ENTRY fastjmp_set
mov x16, sp
stp x16, x30, [x0]
stp x19, x20, [x0, #16]
stp x21, x22, [x0, #32]
stp x23, x24, [x0, #48]
stp x25, x26, [x0, #64]
stp x27, x28, [x0, #80]
str x29, [x0, #96]
stp d8, d9, [x0, #112]
stp d10, d11, [x0, #128]
stp d12, d13, [x0, #144]
stp d14, d15, [x0, #160]
mov w0, wzr
br x30
LEAF_END

; void fastjmp_jmp(fastjmp_buf*, int)
LEAF_ENTRY fastjmp_jmp
ldp x16, x30, [x0]
mov sp, x16
ldp x19, x20, [x0, #16]
ldp x21, x22, [x0, #32]
ldp x23, x24, [x0, #48]
ldp x25, x26, [x0, #64]
ldp x27, x28, [x0, #80]
ldr x29, [x0, #96]
ldp d8, d9, [x0, #112]
ldp d10, d11, [x0, #128]
ldp d12, d13, [x0, #144]
ldp d14, d15, [x0, #160]
mov w0, w1
br x30
LEAF_END

END
Loading

0 comments on commit b2381ad

Please sign in to comment.