diff --git a/FEXCore/Source/Interface/Config/Config.json.in b/FEXCore/Source/Interface/Config/Config.json.in index a28c9c2f9e..25b7e3f126 100644 --- a/FEXCore/Source/Interface/Config/Config.json.in +++ b/FEXCore/Source/Interface/Config/Config.json.in @@ -363,6 +363,14 @@ "Redirects the telemetry folder that FEX usually writes to.", "By default telemetry data is stored in {$FEX_APP_DATA_LOCATION,{$XDG_DATA_HOME,$HOME}/.fex-emu/Telemetry/}" ] + }, + "ProfileStats": { + "Type": "bool", + "Default": "false", + "Desc": [ + "Enables FEX's low-overhead sampling profile statistics.", + "Requires a supported version of Mangohud to see the results" + ] } }, "Hacks": { diff --git a/FEXCore/Source/Interface/Core/Core.cpp b/FEXCore/Source/Interface/Core/Core.cpp index fcea488510..cc3600c580 100644 --- a/FEXCore/Source/Interface/Core/Core.cpp +++ b/FEXCore/Source/Interface/Core/Core.cpp @@ -831,8 +831,9 @@ ContextImpl::CompileCodeResult ContextImpl::CompileCode(FEXCore::Core::InternalT } uintptr_t ContextImpl::CompileBlock(FEXCore::Core::CpuStateFrame* Frame, uint64_t GuestRIP, uint64_t MaxInst) { - FEXCORE_PROFILE_SCOPED("CompileBlock"); auto Thread = Frame->Thread; + FEXCORE_PROFILE_SCOPED("CompileBlock"); + FEXCORE_PROFILE_ACCUMULATION(Thread, AccumulatedJITTime); // Invalidate might take a unique lock on this, to guarantee that during invalidation no code gets compiled auto lk = GuardSignalDeferringSection(CodeInvalidationMutex, Thread); diff --git a/FEXCore/include/FEXCore/Debug/InternalThreadState.h b/FEXCore/include/FEXCore/Debug/InternalThreadState.h index 81c49932a5..5c8bc424fb 100644 --- a/FEXCore/include/FEXCore/Debug/InternalThreadState.h +++ b/FEXCore/include/FEXCore/Debug/InternalThreadState.h @@ -36,6 +36,10 @@ class OpDispatchBuilder; class PassManager; } // namespace FEXCore::IR +namespace FEXCore::Profiler { +struct ThreadStats; +}; + namespace FEXCore::Core { // Special-purpose replacement for std::unique_ptr to allow InternalThreadState to be standard layout. @@ -95,6 +99,9 @@ struct InternalThreadState : public FEXCore::Allocator::FEXAllocOperators { std::shared_mutex ObjectCacheRefCounter {}; + // This pointer is owned by the frontend. + FEXCore::Profiler::ThreadStats* ThreadStats {}; + ///< Data pointer for exclusive use by the frontend void* FrontendPtr; diff --git a/FEXCore/include/FEXCore/Utils/Profiler.h b/FEXCore/include/FEXCore/Utils/Profiler.h index 059d49d064..123d672a76 100644 --- a/FEXCore/include/FEXCore/Utils/Profiler.h +++ b/FEXCore/include/FEXCore/Utils/Profiler.h @@ -1,13 +1,73 @@ // SPDX-License-Identifier: MIT #pragma once +#include #include #include +#ifdef _M_X86_64 +#include +#endif + #include namespace FEXCore::Profiler { +// FEXCore live-stats +constexpr uint8_t STATS_VERSION = 1; +enum class AppType : uint8_t { + LINUX_32, + LINUX_64, + WIN_ARM64EC, + WIN_WOW64, +}; + +struct ThreadStatsHeader { + uint8_t Version; + AppType app_type; + uint8_t _pad[2]; + char fex_version[48]; + std::atomic Head; + std::atomic Size; + uint32_t Pad; +}; + +struct ThreadStats { + std::atomic Next; + std::atomic TID; + + // Accumulated time (In unscaled CPU cycles!) + uint64_t AccumulatedJITTime; + uint64_t AccumulatedSignalTime; + + // Accumulated event counts + uint64_t AccumulatedSIGBUSCount; + uint64_t AccumulatedSMCCount; +}; + #ifdef ENABLE_FEXCORE_PROFILER +#ifdef _M_ARM_64 +/** + * @brief Get the raw cycle counter with synchronizing isb. + * + * `CNTVCTSS_EL0` also does the same thing, but requires the FEAT_ECV feature. + */ +static inline uint64_t GetCycleCounter() { + uint64_t Result {}; + __asm volatile(R"( + isb; + mrs %[Res], CNTVCT_EL0; + )" + : [Res] "=r"(Result)); + return Result; +} +#else +static inline uint64_t GetCycleCounter() { + unsigned dummy; + uint64_t tsc = __rdtscp(&dummy); + return tsc; +} +#endif + FEX_DEFAULT_VISIBILITY void Init(); FEX_DEFAULT_VISIBILITY void Shutdown(); FEX_DEFAULT_VISIBILITY void TraceObject(std::string_view const Format); @@ -34,6 +94,36 @@ class ProfilerBlock final { // Declare a scoped profile block variable with a fixed name. #define FEXCORE_PROFILE_SCOPED(name) FEXCore::Profiler::ProfilerBlock UniqueScopeName(ScopedBlock_, __LINE__)(name) +template +class AccumulationBlock final { +public: + AccumulationBlock(T* Stat) + : Begin {GetCycleCounter()} + , Stat {Stat} {} + + ~AccumulationBlock() { + const auto Duration = GetCycleCounter() - Begin + FlatOffset; + if (Stat) { + auto ref = std::atomic_ref(*Stat); + ref.fetch_add(Duration, std::memory_order_relaxed); + } + } + +private: + uint64_t Begin; + T* Stat; +}; + +#define FEXCORE_PROFILE_ACCUMULATION(ThreadState, Stat) \ + FEXCore::Profiler::AccumulationBlockThreadStats->Stat)> UniqueScopeName(ScopedAccumulation_, __LINE__)( \ + ThreadState->ThreadStats ? &ThreadState->ThreadStats->Stat : nullptr); +#define FEXCORE_PROFILE_INSTANT_INCREMENT(ThreadState, Stat, value) \ + do { \ + if (ThreadState->ThreadStats) { \ + ThreadState->ThreadStats->Stat += value; \ + } \ + } while (0) + #else [[maybe_unused]] static void Init() {} @@ -50,5 +140,12 @@ static void TraceObject(std::string_view const, uint64_t) {} #define FEXCORE_PROFILE_SCOPED(...) \ do { \ } while (0) +#define FEXCORE_PROFILE_ACCUMULATION(...) \ + do { \ + } while (0) +#define FEXCORE_PROFILE_INSTANT_INCREMENT(...) \ + do { \ + } while (0) + #endif } // namespace FEXCore::Profiler diff --git a/Source/Common/CMakeLists.txt b/Source/Common/CMakeLists.txt index 241df8e5aa..d0b0d29a30 100644 --- a/Source/Common/CMakeLists.txt +++ b/Source/Common/CMakeLists.txt @@ -7,7 +7,8 @@ set(SRCS EnvironmentLoader.cpp HostFeatures.cpp JSONPool.cpp - StringUtil.cpp) + StringUtil.cpp + Profiler.cpp) if (NOT MINGW_BUILD) list (APPEND SRCS diff --git a/Source/Common/Profiler.cpp b/Source/Common/Profiler.cpp new file mode 100644 index 0000000000..1e2f662f07 --- /dev/null +++ b/Source/Common/Profiler.cpp @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: MIT +#include "Common/Profiler.h" +#include "git_version.h" + +#include + +namespace FEX::Profiler { +void StatAllocBase::SaveHeader(FEXCore::Profiler::AppType AppType) { + if (!Base) { + return; + } + + Head = reinterpret_cast(Base); + Head->Size.store(CurrentSize, std::memory_order_relaxed); + Head->Version = FEXCore::Profiler::STATS_VERSION; + + std::string_view GitString = GIT_DESCRIBE_STRING; + strncpy(Head->fex_version, GitString.data(), std::min(GitString.size(), sizeof(Head->fex_version))); + Head->app_type = AppType; + + Stats = reinterpret_cast(reinterpret_cast(Base) + sizeof(FEXCore::Profiler::ThreadStatsHeader)); + + RemainingSlots = TotalSlotsFromSize(); +} + +bool StatAllocBase::AllocateMoreSlots() { + const auto OriginalSlotCount = TotalSlotsFromSize(); + + uint32_t NewSize = FrontendAllocateSlots(CurrentSize * 2); + + if (NewSize == CurrentSize) { + return false; + } + + CurrentSize = NewSize; + Head->Size.store(CurrentSize, std::memory_order_relaxed); + RemainingSlots = TotalSlotsFromSize() - OriginalSlotCount; + + return true; +} + +FEXCore::Profiler::ThreadStats* StatAllocBase::AllocateSlot(uint32_t TID) { + if (!RemainingSlots) { + if (!AllocateMoreSlots()) { + return nullptr; + } + } + + // Find a free slot + store_memory_barrier(); + FEXCore::Profiler::ThreadStats* AllocatedSlot {}; + for (size_t i = 0; i < TotalSlotsFromSize(); ++i) { + AllocatedSlot = &Stats[i]; + if (AllocatedSlot->TID.load(std::memory_order_relaxed) == 0) { + break; + } + } + + --RemainingSlots; + + // Slot might be reused, just zero it now. + memset(AllocatedSlot, 0, sizeof(FEXCore::Profiler::ThreadStatsHeader)); + + // TID != 0 means slot is allocated. + AllocatedSlot->TID.store(TID, std::memory_order_relaxed); + + // Setup singly-linked list + if (Head->Head.load(std::memory_order_relaxed) == 0) { + Head->Head.store(OffsetFromStat(AllocatedSlot), std::memory_order_relaxed); + } else { + StatTail->Next.store(OffsetFromStat(AllocatedSlot), std::memory_order_relaxed); + } + + // Update the tail. + StatTail = AllocatedSlot; + return AllocatedSlot; +} + +void StatAllocBase::DeallocateSlot(FEXCore::Profiler::ThreadStats* AllocatedSlot) { + if (!AllocatedSlot) { + return; + } + + // TID == 0 will signal the reader to ignore this slot & deallocate it! + AllocatedSlot->TID.store(0, std::memory_order_relaxed); + + store_memory_barrier(); + + const auto SlotOffset = OffsetFromStat(AllocatedSlot); + const auto AllocatedSlotNext = AllocatedSlot->Next.load(std::memory_order_relaxed); + + const bool IsTail = AllocatedSlot == StatTail; + + // Update the linked list. + if (Head->Head == SlotOffset) { + Head->Head.store(AllocatedSlotNext, std::memory_order_relaxed); + if (IsTail) { + StatTail = nullptr; + } + } else { + for (size_t i = 0; i < TotalSlotsFromSize(); ++i) { + auto Slot = &Stats[i]; + auto NextSlotOffset = Slot->Next.load(std::memory_order_relaxed); + + if (NextSlotOffset == SlotOffset) { + Slot->Next.store(AllocatedSlotNext, std::memory_order_relaxed); + + if (IsTail) { + // This slot is now the tail. + StatTail = Slot; + } + break; + } + } + } + + ++RemainingSlots; +} + +} // namespace FEX::Profiler diff --git a/Source/Common/Profiler.h b/Source/Common/Profiler.h new file mode 100644 index 0000000000..821ae7cdf0 --- /dev/null +++ b/Source/Common/Profiler.h @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: MIT +/* +$info$ +tags: Common|Profiler +desc: Frontend profiler common code +$end_info$ +*/ +#pragma once +#include + +namespace FEXCore::Core { +struct InternalThreadState; +} + +#ifdef _M_ARM_64 +static inline void store_memory_barrier() { + asm volatile("dmb ishst;" ::: "memory"); +} + +#else +static inline void store_memory_barrier() { + // Intentionally empty. + // x86 is strongly memory ordered with regular loadstores. No need for barrier. +} +#endif + +namespace FEX::Profiler { +class StatAllocBase { +protected: + FEXCore::Profiler::ThreadStats* AllocateSlot(uint32_t TID); + void DeallocateSlot(FEXCore::Profiler::ThreadStats* AllocatedSlot); + + uint32_t OffsetFromStat(FEXCore::Profiler::ThreadStats* Stat) const { + return reinterpret_cast(Stat) - reinterpret_cast(Base); + } + uint32_t TotalSlotsFromSize() const { + return (CurrentSize - sizeof(FEXCore::Profiler::ThreadStatsHeader)) / sizeof(FEXCore::Profiler::ThreadStats) - 1; + } + uint32_t TotalSlotsFromSize(uint32_t Size) const { + return (Size - sizeof(FEXCore::Profiler::ThreadStatsHeader)) / sizeof(FEXCore::Profiler::ThreadStats) - 1; + } + + uint32_t SlotIndexFromOffset(uint32_t Offset) { + return (Offset - sizeof(FEXCore::Profiler::ThreadStatsHeader)) / sizeof(FEXCore::Profiler::ThreadStats); + } + + void SaveHeader(FEXCore::Profiler::AppType AppType); + + void* Base; + uint32_t CurrentSize {}; + FEXCore::Profiler::ThreadStatsHeader* Head {}; + FEXCore::Profiler::ThreadStats* Stats; + FEXCore::Profiler::ThreadStats* StatTail {}; + uint32_t RemainingSlots; + + // Limited to 4MB which should be a few hundred threads of tracking capability. + // I (Sonicadvance1) wanted to reserve 128MB of VA space because it's cheap, but ran in to a bug when running WINE. + // WINE allocates [0x7fff'fe00'0000, 0x7fff'ffff'0000) which /consistently/ overlaps with FEX's sigaltstack. + // This only occurs when this stat allocation size is large as the top-down allocation pushes the alt-stack further. + // Additionally, only occurs on 48-bit VA systems, as mmap on lesser VA will fail regardless. + // TODO: Bump allocation size up once FEXCore's allocator can first use the 128TB of blocked VA space on 48-bit systems. + constexpr static uint32_t MAX_STATS_SIZE = 4 * 1024 * 1024; + +private: + virtual uint32_t FrontendAllocateSlots(uint32_t NewSize) = 0; + bool AllocateMoreSlots(); +}; + +} // namespace FEX::Profiler diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/SignalDelegator.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/SignalDelegator.cpp index 151e8d9b66..88226efc75 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/SignalDelegator.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/SignalDelegator.cpp @@ -18,6 +18,7 @@ desc: Handles host -> host and host -> guest signal routing, emulates procmask & #include #include #include +#include #include #include @@ -59,6 +60,7 @@ static FEX::HLE::ThreadStateObject* GetThreadFromAltStack(const stack_t& alt_sta static void SignalHandlerThunk(int Signal, siginfo_t* Info, void* UContext) { ucontext_t* _context = (ucontext_t*)UContext; auto ThreadObject = GetThreadFromAltStack(_context->uc_stack); + FEXCORE_PROFILE_ACCUMULATION(ThreadObject->Thread, AccumulatedSignalTime); ThreadObject->SignalInfo.Delegator->HandleSignal(ThreadObject, Signal, Info, UContext); } @@ -673,6 +675,8 @@ void SignalDelegator::HandleGuestSignal(FEX::HLE::ThreadStateObject* ThreadObjec SaveTelemetry(); #endif + FEX::HLE::_SyscallHandler->TM.CleanupForExit(); + // Reassign back to DFL and crash signal(Signal, SIG_DFL); if (SigInfo.si_code != SI_KERNEL) { @@ -916,6 +920,7 @@ SignalDelegator::SignalDelegator(FEXCore::Context::Context* _CTX, const std::str return false; } + FEXCORE_PROFILE_INSTANT_INCREMENT(Thread, AccumulatedSIGBUSCount, 1); const auto Delegator = FEX::HLE::ThreadManager::GetStateObjectFromFEXCoreThread(Thread)->SignalInfo.Delegator; const auto Result = FEXCore::ArchHelpers::Arm64::HandleUnalignedAccess(Thread, Delegator->GetUnalignedHandlerType(), PC, ArchHelpers::Context::GetArmGPRs(ucontext)); diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.cpp index 58a4a2dacc..d929a5c165 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.cpp @@ -883,6 +883,7 @@ uint64_t UnimplementedSyscallSafe(FEXCore::Core::CpuStateFrame* Frame, uint64_t } void SyscallHandler::LockBeforeFork(FEXCore::Core::InternalThreadState* Thread) { + TM.LockBeforeFork(); Thread->CTX->LockBeforeFork(Thread); VMATracking.Mutex.lock(); } diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Thread.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Thread.cpp index 8b5e380ac6..ea27d41737 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Thread.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Thread.cpp @@ -61,6 +61,9 @@ static void* ThreadHandler(void* Data) { Thread->ThreadInfo.PID = ::getpid(); Thread->ThreadInfo.TID = FHU::Syscalls::gettid(); + if (Thread->Thread->ThreadStats) { + Thread->Thread->ThreadStats->TID.store(Thread->ThreadInfo.TID, std::memory_order_relaxed); + } FEX::HLE::_SyscallHandler->RegisterTLSState(Thread); @@ -558,6 +561,7 @@ void RegisterThread(FEX::HLE::SyscallHandler* Handler) { [](FEXCore::Core::CpuStateFrame* Frame, int status) -> uint64_t { // Save telemetry if we're exiting. FEX::HLE::_SyscallHandler->GetSignalDelegator()->SaveTelemetry(); + FEX::HLE::_SyscallHandler->TM.CleanupForExit(); syscall(SYSCALL_DEF(exit_group), status); // This will never be reached diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsSMCTracking.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsSMCTracking.cpp index b81242d95a..8edfa70c4d 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsSMCTracking.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsSMCTracking.cpp @@ -97,6 +97,7 @@ bool SyscallHandler::HandleSegfault(FEXCore::Core::InternalThreadState* Thread, }); } + FEXCORE_PROFILE_INSTANT_INCREMENT(Thread, AccumulatedSMCCount, 1); return true; } } diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/ThreadManager.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/ThreadManager.cpp index 106e5a4cb1..742cac5380 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/ThreadManager.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/ThreadManager.cpp @@ -4,8 +4,157 @@ #include "LinuxSyscalls/SignalDelegator.h" #include +#include +#include + +#include +#include +#include +#include namespace FEX::HLE { + +ThreadManager::StatAlloc::StatAlloc() { + Initialize(); + SaveHeader(Is64BitMode() ? FEXCore::Profiler::AppType::LINUX_64 : FEXCore::Profiler::AppType::LINUX_32); +} + +void ThreadManager::StatAlloc::Initialize() { + if (!ProfileStats()) { + return; + } + + int fd = shm_open(fextl::fmt::format("fex-{}-stats", ::getpid()).c_str(), O_CREAT | O_TRUNC | O_RDWR, USER_PERMS); + if (fd == -1) { + return; + } + CurrentSize = sysconf(_SC_PAGESIZE); + if (CurrentSize == 0) { + CurrentSize = 4096; + } + + if (ftruncate(fd, CurrentSize) == -1) { + LogMan::Msg::EFmt("[StatAlloc] ftruncate failed"); + goto err; + } + + // Reserve a region of MAX_STATS_SIZE so we can grow the allocation buffer. + // Number of thread slots when ThreadStatsHeader == 64bytes and ThreadStats == 40bytes: + // 1 page: 99 slots + // 1 MB: 26211 slots + // 128 MB: 3355440 slots + Base = ::mmap(nullptr, MAX_STATS_SIZE, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); + if (Base == MAP_FAILED) { + LogMan::Msg::EFmt("[StatAlloc] mmap base failed"); + Base = nullptr; + goto err; + } + + // Allocate a small working shared space for now, grow as necessary. + { + auto SharedBase = ::mmap(Base, CurrentSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, 0); + if (SharedBase == MAP_FAILED) { + LogMan::Msg::EFmt("[StatAlloc] mmap shm failed"); + munmap(Base, MAX_STATS_SIZE); + Base = nullptr; + goto err; + } + } + +err: + close(fd); +} + +uint32_t ThreadManager::StatAlloc::FrontendAllocateSlots(uint32_t NewSize) { + if (CurrentSize == MAX_STATS_SIZE) { + // Allocator has reached maximum slots. We can't allocate anymore. + // New threads won't get stats. + return CurrentSize; + } + NewSize = std::max(MAX_STATS_SIZE, NewSize); + + // When allocating more slots, open the fd without O_TRUNC | O_CREAT. + int fd = shm_open(fextl::fmt::format("fex-{}-stats", ::getpid()).c_str(), O_RDWR, USER_PERMS); + if (!fd) { + return CurrentSize; + } + + if (ftruncate(fd, NewSize) == -1) { + LogMan::Msg::EFmt("[StatAlloc] ftruncate more failed"); + + goto err; + } + + { + auto SharedBase = ::mmap(Base, NewSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, 0); + if (SharedBase == MAP_FAILED) { + LogMan::Msg::EFmt("[StatAlloc] allocate more mmap shm failed"); + goto err; + } + } + +err: + close(fd); + return NewSize; +} + +FEXCore::Profiler::ThreadStats* ThreadManager::StatAlloc::AllocateSlot(uint32_t TID) { + std::scoped_lock lk(StatMutex); + return StatAllocBase::AllocateSlot(TID); +} + +void ThreadManager::StatAlloc::DeallocateSlot(FEXCore::Profiler::ThreadStats* AllocatedSlot) { + if (!AllocatedSlot) { + return; + } + + std::scoped_lock lk(StatMutex); + StatAllocBase::DeallocateSlot(AllocatedSlot); +} + +void ThreadManager::StatAlloc::CleanupForExit() { + shm_unlink(fextl::fmt::format("fex-{}-stats", ::getpid()).c_str()); +} + +void ThreadManager::StatAlloc::LockBeforeFork() { + if (!ProfileStats()) { + return; + } + StatMutex.lock(); +} + +void ThreadManager::StatAlloc::UnlockAfterFork(FEXCore::Core::InternalThreadState* Thread, bool Child) { + if (!ProfileStats()) { + return; + } + + if (!Child) { + StatMutex.unlock(); + return; + } + + StatMutex.StealAndDropActiveLocks(); + + // shm_memory ownership is retained by the parent process, so the child must replace it with its own one. + // Otherwise this process will keep reporting in the original parent thread's stats region. + munmap(Base, MAX_STATS_SIZE); + Base = nullptr; + CurrentSize = 0; + Head = nullptr; + Stats = nullptr; + StatTail = nullptr; + RemainingSlots = 0; + + Thread->ThreadStats = nullptr; + + Initialize(); + SaveHeader(Is64BitMode() ? FEXCore::Profiler::AppType::LINUX_64 : FEXCore::Profiler::AppType::LINUX_32); + + // Update this thread's ThreadStats object + auto ThreadObject = FEX::HLE::ThreadManager::GetStateObjectFromFEXCoreThread(Thread); + ThreadObject->Thread->ThreadStats = AllocateSlot(ThreadObject->ThreadInfo.TID); +} + FEX::HLE::ThreadStateObject* ThreadManager::CreateThread(uint64_t InitialRIP, uint64_t StackPointer, const FEXCore::Core::CPUState* NewThreadState, uint64_t ParentTID, FEX::HLE::ThreadStateObject* InheritThread) { auto ThreadStateObject = new FEX::HLE::ThreadStateObject; @@ -13,12 +162,13 @@ FEX::HLE::ThreadStateObject* ThreadManager::CreateThread(uint64_t InitialRIP, ui ThreadStateObject->ThreadInfo.parent_tid = ParentTID; ThreadStateObject->ThreadInfo.PID = ::getpid(); - if (ParentTID == 0) { - ThreadStateObject->ThreadInfo.TID = FHU::Syscalls::gettid(); - } + ThreadStateObject->ThreadInfo.TID = FHU::Syscalls::gettid(); ThreadStateObject->Thread = CTX->CreateThread(InitialRIP, StackPointer, NewThreadState, ParentTID); ThreadStateObject->Thread->FrontendPtr = ThreadStateObject; + if (ProfileStats()) { + ThreadStateObject->Thread->ThreadStats = Stat.AllocateSlot(ThreadStateObject->ThreadInfo.TID); + } if (InheritThread) { FEX::HLE::_SyscallHandler->SeccompEmulator.InheritSeccompFilters(InheritThread, ThreadStateObject); @@ -37,6 +187,8 @@ void ThreadManager::DestroyThread(FEX::HLE::ThreadStateObject* Thread, bool Need Threads.erase(It); } + Stat.DeallocateSlot(Thread->Thread->ThreadStats); + HandleThreadDeletion(Thread, NeedsTLSUninstall); } @@ -212,7 +364,12 @@ void ThreadManager::UnpauseThread(FEX::HLE::ThreadStateObject* Thread) { Thread->ThreadPaused.NotifyOne(); } +void ThreadManager::LockBeforeFork() { + Stat.LockBeforeFork(); +} + void ThreadManager::UnlockAfterFork(FEXCore::Core::InternalThreadState* LiveThread, bool Child) { + Stat.UnlockAfterFork(LiveThread, Child); if (!Child) { return; } @@ -220,6 +377,9 @@ void ThreadManager::UnlockAfterFork(FEXCore::Core::InternalThreadState* LiveThre // This function is called after fork // We need to cleanup some of the thread data that is dead for (auto& DeadThread : Threads) { + // The fork parent retains ownership of ThreadStats + DeadThread->Thread->ThreadStats = nullptr; + if (DeadThread->Thread == LiveThread) { continue; } diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/ThreadManager.h b/Source/Tools/LinuxEmulation/LinuxSyscalls/ThreadManager.h index 2401a88357..8fae497131 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/ThreadManager.h +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/ThreadManager.h @@ -8,11 +8,14 @@ desc: Frontend thread management #pragma once +#include "Common/Profiler.h" + #include "LinuxSyscalls/Types.h" #include "LinuxSyscalls/Seccomp/SeccompEmulator.h" #include #include +#include #include #include @@ -105,6 +108,35 @@ class ThreadManager final { ~ThreadManager(); + class StatAlloc final : public FEX::Profiler::StatAllocBase { + public: + StatAlloc(); + + void LockBeforeFork(); + void UnlockAfterFork(FEXCore::Core::InternalThreadState* Thread, bool Child); + + void CleanupForExit(); + + FEXCore::Profiler::ThreadStats* AllocateSlot(uint32_t TID); + void DeallocateSlot(FEXCore::Profiler::ThreadStats* AllocatedSlot); + + private: + void Initialize(); + + uint32_t FrontendAllocateSlots(uint32_t NewSize) override; + FEX_CONFIG_OPT(ProfileStats, PROFILESTATS); + FEX_CONFIG_OPT(Is64BitMode, IS64BIT_MODE); + + constexpr static int USER_PERMS = S_IRWXU | S_IRWXG | S_IRWXO; + FEXCore::ForkableUniqueMutex StatMutex; + }; + + void CleanupForExit() { + Stat.CleanupForExit(); + } + + StatAlloc Stat; + ///< Returns the ThreadStateObject from a CpuStateFrame object. static inline FEX::HLE::ThreadStateObject* GetStateObjectFromCPUState(FEXCore::Core::CpuStateFrame* Frame) { return static_cast(Frame->Thread->FrontendPtr); @@ -136,6 +168,7 @@ class ThreadManager final { void SleepThread(FEXCore::Context::Context* CTX, FEXCore::Core::CpuStateFrame* Frame); + void LockBeforeFork(); void UnlockAfterFork(FEXCore::Core::InternalThreadState* Thread, bool Child); void IncrementIdleRefCount() { @@ -188,6 +221,7 @@ class ThreadManager final { void HandleThreadDeletion(FEX::HLE::ThreadStateObject* Thread, bool NeedsTLSUninstall = false); void NotifyPause(); + FEX_CONFIG_OPT(ProfileStats, PROFILESTATS); }; } // namespace FEX::HLE diff --git a/Source/Windows/ARM64EC/Module.cpp b/Source/Windows/ARM64EC/Module.cpp index bc085b92da..a3eb0a1a57 100644 --- a/Source/Windows/ARM64EC/Module.cpp +++ b/Source/Windows/ARM64EC/Module.cpp @@ -37,6 +37,7 @@ desc: Implements the ARM64EC BT module API using FEXCore #include "Common/CRT/CRT.h" #include "DummyHandlers.h" #include "BTInterface.h" +#include "Windows/Common/Profiler.h" #include #include @@ -122,6 +123,7 @@ namespace { fextl::unique_ptr CTX; fextl::unique_ptr SignalDelegator; fextl::unique_ptr SyscallHandler; +fextl::unique_ptr StatAllocHandler; std::optional InvalidationTracker; std::optional CPUFeatures; std::optional OvercommitTracker; @@ -255,12 +257,14 @@ struct alignas(16) KiUserExceptionDispatcherStackLayout { }; static bool HandleUnalignedAccess(ARM64_NT_CONTEXT& Context) { - if (!CTX->IsAddressInCodeBuffer(GetCPUArea().ThreadState(), Context.Pc)) { + auto Thread = GetCPUArea().ThreadState(); + if (!CTX->IsAddressInCodeBuffer(Thread, Context.Pc)) { return false; } - const auto Result = FEXCore::ArchHelpers::Arm64::HandleUnalignedAccess(GetCPUArea().ThreadState(), - HandlerConfig->GetUnalignedHandlerType(), Context.Pc, &Context.X0); + FEXCORE_PROFILE_INSTANT_INCREMENT(Thread, AccumulatedSIGBUSCount, 1); + const auto Result = + FEXCore::ArchHelpers::Arm64::HandleUnalignedAccess(Thread, HandlerConfig->GetUnalignedHandlerType(), Context.Pc, &Context.X0); if (!Result.first) { return false; } @@ -567,6 +571,11 @@ NTSTATUS ProcessInit() { const uintptr_t KiUserExceptionDispatcherFFS = reinterpret_cast(GetProcAddress(NtDll, "KiUserExceptionDispatcher")); Exception::KiUserExceptionDispatcher = NtDllRedirectionLUT[KiUserExceptionDispatcherFFS - NtDllBase] + NtDllBase; + FEX_CONFIG_OPT(ProfileStats, PROFILESTATS); + + if (IsWine && ProfileStats()) { + StatAllocHandler = fextl::make_unique(FEXCore::Profiler::AppType::WIN_ARM64EC); + } return STATUS_SUCCESS; } @@ -590,19 +599,22 @@ class ScopedCallbackDisable { // Returns true if exception dispatch should be halted and the execution context restored to NativeContext bool ResetToConsistentStateImpl(EXCEPTION_RECORD* Exception, CONTEXT* GuestContext, ARM64_NT_CONTEXT* NativeContext) { const auto CPUArea = GetCPUArea(); + auto Thread = CPUArea.ThreadState(); + FEXCORE_PROFILE_ACCUMULATION(Thread, AccumulatedSignalTime); LogMan::Msg::DFmt("Exception: Code: {:X} Address: {:X}", Exception->ExceptionCode, reinterpret_cast(Exception->ExceptionAddress)); - if (Exception->ExceptionCode == EXCEPTION_ACCESS_VIOLATION && CPUArea.ThreadState() && InvalidationTracker) { + if (Exception->ExceptionCode == EXCEPTION_ACCESS_VIOLATION && Thread && InvalidationTracker) { const auto FaultAddress = static_cast(Exception->ExceptionInformation[1]); std::scoped_lock Lock(ThreadCreationMutex); if (InvalidationTracker->HandleRWXAccessViolation(FaultAddress)) { - if (CTX->IsAddressInCodeBuffer(CPUArea.ThreadState(), NativeContext->Pc) && !CTX->IsCurrentBlockSingleInst(CPUArea.ThreadState()) && - CTX->IsAddressInCurrentBlock(CPUArea.ThreadState(), FaultAddress, 8)) { + FEXCORE_PROFILE_INSTANT_INCREMENT(Thread, AccumulatedSMCCount, 1); + if (CTX->IsAddressInCodeBuffer(Thread, NativeContext->Pc) && !CTX->IsCurrentBlockSingleInst(CPUArea.ThreadState()) && + CTX->IsAddressInCurrentBlock(Thread, FaultAddress, 8)) { // If we are not patching ourself (single inst block case) and patching the current block, this is inline SMC. Reconstruct the current context (before the SMC write) then single step the write to reduce it to regular SMC. - Exception::ReconstructThreadState(CPUArea.ThreadState(), *NativeContext); + Exception::ReconstructThreadState(Thread, *NativeContext); LogMan::Msg::DFmt("Handled inline self-modifying code: pc: {:X} rip: {:X} fault: {:X}", NativeContext->Pc, - CPUArea.ThreadState()->CurrentFrame->State.rip, FaultAddress); + Thread->CurrentFrame->State.rip, FaultAddress); NativeContext->Pc = CPUArea.DispatcherLoopTopEnterECFillSRA(); NativeContext->Sp = CPUArea.EmulatorStackBase(); NativeContext->X10 = 1; // Set ENTRY_FILL_SRA_SINGLE_INST_REG to force a single step @@ -803,7 +815,11 @@ NTSTATUS ThreadInit() { { std::scoped_lock Lock(ThreadCreationMutex); - Threads.emplace(GetCurrentThreadId(), Thread); + auto ThreadTID = GetCurrentThreadId(); + Threads.emplace(ThreadTID, Thread); + if (StatAllocHandler) { + Thread->ThreadStats = StatAllocHandler->AllocateSlot(ThreadTID); + } } CPUArea.ThreadState() = Thread; @@ -828,6 +844,9 @@ NTSTATUS ThreadTerm(HANDLE Thread, LONG ExitCode) { { std::scoped_lock Lock(ThreadCreationMutex); Threads.erase(ThreadTID); + if (StatAllocHandler) { + StatAllocHandler->DeallocateSlot(OldThreadState->ThreadStats); + } } CTX->DestroyThread(OldThreadState); diff --git a/Source/Windows/Common/CMakeLists.txt b/Source/Windows/Common/CMakeLists.txt index 8d92d8f18a..70a0e46c84 100644 --- a/Source/Windows/Common/CMakeLists.txt +++ b/Source/Windows/Common/CMakeLists.txt @@ -1,4 +1,4 @@ -add_library(CommonWindows STATIC CPUFeatures.cpp InvalidationTracker.cpp Logging.cpp LoadConfig.S) +add_library(CommonWindows STATIC CPUFeatures.cpp Profiler.cpp InvalidationTracker.cpp Logging.cpp LoadConfig.S) add_subdirectory(CRT) add_subdirectory(WinAPI) target_link_libraries(CommonWindows FEXCore_Base JemallocLibs) diff --git a/Source/Windows/Common/Profiler.cpp b/Source/Windows/Common/Profiler.cpp new file mode 100644 index 0000000000..cfa6437105 --- /dev/null +++ b/Source/Windows/Common/Profiler.cpp @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: MIT +#include "Windows/Common/Profiler.h" + +#include +#include + +#include +#include +#include +#include +#include + +namespace FEX::Windows { +__attribute__((naked)) uint64_t linux_getpid() { + asm volatile(R"( + mov x8, 172; + svc #0; + ret; + )" :: + : "r0", "r8"); +} + +uint32_t StatAlloc::FrontendAllocateSlots(uint32_t NewSize) { + if (CurrentSize == MAX_STATS_SIZE || !UsingNTQueryPath) { + LogMan::Msg::DFmt("Ran out of slots. Can't allocate more"); + return CurrentSize; + } + + MEMORY_FEX_STATS_SHM_INFORMATION Info { + .shm_base = nullptr, + .map_size = std::min(CurrentSize * 2, MAX_STATS_SIZE), + .max_size = MAX_STATS_SIZE, + }; + size_t Length {}; + auto Result = NtQueryVirtualMemory(NtCurrentProcess(), nullptr, MemoryFexStatsShm, &Info, sizeof(Info), &Length); + if (!Result) { + CurrentSize = Info.map_size; + } + + return CurrentSize; +} + +StatAlloc::StatAlloc(FEXCore::Profiler::AppType AppType) { + // Try wine+fex magic path. + + { + MEMORY_FEX_STATS_SHM_INFORMATION Info { + .shm_base = nullptr, + .map_size = 4096, + .max_size = MAX_STATS_SIZE, + }; + size_t Length {}; + auto Result = NtQueryVirtualMemory(NtCurrentProcess(), nullptr, MemoryFexStatsShm, &Info, sizeof(Info), &Length); + if (!Result) { + UsingNTQueryPath = true; + CurrentSize = Info.map_size; + Base = Info.shm_base; + SaveHeader(AppType); + return; + } + } + CurrentSize = MAX_STATS_SIZE; + + auto handle = CreateFile(fextl::fmt::format("/dev/shm/fex-{}-stats", linux_getpid()).c_str(), GENERIC_READ | GENERIC_WRITE, + FILE_SHARE_READ, nullptr, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr); + + // Create the section mapping for the file handle for the full size. + HANDLE SectionMapping; + LARGE_INTEGER SectionSize {{MAX_STATS_SIZE}}; + auto Result = NtCreateSection(&SectionMapping, SECTION_EXTEND_SIZE | SECTION_MAP_READ | SECTION_MAP_WRITE, nullptr, &SectionSize, + PAGE_READWRITE, SEC_COMMIT, handle); + if (Result != 0) { + CloseHandle(handle); + return; + } + + // Section mapping is used from now on. + CloseHandle(handle); + + // Now actually map the view of the section. + Base = 0; + size_t FullSize = MAX_STATS_SIZE; + Result = NtMapViewOfSection(SectionMapping, NtCurrentProcess(), &Base, 0, 0, nullptr, &FullSize, ViewUnmap, MEM_RESERVE | MEM_TOP_DOWN, + PAGE_READWRITE); + if (Result != 0) { + CloseHandle(SectionMapping); + return; + } + + // Once WINE supports NtExtendSection and SECTION_EXTEND_SIZE correctly then we can map/commit a single page, map the full MAX_STATS_SIZE + // view as reserved, and extend the view using NtExtendSection. + SaveHeader(AppType); +} +StatAlloc::~StatAlloc() { + DeleteFile(fextl::fmt::format("/dev/shm/fex-{}-stats", linux_getpid()).c_str()); +} + +} // namespace FEX::Windows diff --git a/Source/Windows/Common/Profiler.h b/Source/Windows/Common/Profiler.h new file mode 100644 index 0000000000..9deea2e552 --- /dev/null +++ b/Source/Windows/Common/Profiler.h @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: MIT +#pragma once + +#include "Common/Profiler.h" + +namespace FEX::Windows { +class StatAlloc final : public FEX::Profiler::StatAllocBase { +public: + StatAlloc(FEXCore::Profiler::AppType AppType); + virtual ~StatAlloc(); + + FEXCore::Profiler::ThreadStats* AllocateSlot(uint32_t TID) { + return StatAllocBase::AllocateSlot(TID); + } + + void DeallocateSlot(FEXCore::Profiler::ThreadStats* AllocatedSlot) { + if (!AllocatedSlot) { + return; + } + + StatAllocBase::DeallocateSlot(AllocatedSlot); + } + +private: + uint32_t FrontendAllocateSlots(uint32_t NewSize) override; + bool UsingNTQueryPath {}; +}; + +} // namespace FEX::Windows diff --git a/Source/Windows/Common/WinAPI/IO.cpp b/Source/Windows/Common/WinAPI/IO.cpp index d453f509be..16ad036a33 100644 --- a/Source/Windows/Common/WinAPI/IO.cpp +++ b/Source/Windows/Common/WinAPI/IO.cpp @@ -45,6 +45,37 @@ FILE_INFORMATION_CLASS FileInfoClassToNT(FILE_INFO_BY_HANDLE_CLASS InformationCl } } // namespace +DLLEXPORT_FUNC(BOOL, DeleteFileA, (LPCSTR lpFileName)) { + ScopedUnicodeString FileName {lpFileName}; + return DeleteFileW(FileName->Buffer); +} + +DLLEXPORT_FUNC(BOOL, DeleteFileW, (LPCWSTR lpFileName)) { + UNICODE_STRING PathW; + RtlInitUnicodeString(&PathW, lpFileName); + + ScopedUnicodeString NTPath; + if (!RtlDosPathNameToNtPathName_U(PathW.Buffer, &*NTPath, nullptr, nullptr)) { + SetLastError(ERROR_PATH_NOT_FOUND); + return false; + } + + OBJECT_ATTRIBUTES ObjAttributes; + InitializeObjectAttributes(&ObjAttributes, &*NTPath, OBJ_CASE_INSENSITIVE, nullptr, nullptr); + + HANDLE Handle; + IO_STATUS_BLOCK IOSB; + + NTSTATUS Status = + NtCreateFile(&Handle, SYNCHRONIZE | DELETE, &ObjAttributes, &IOSB, nullptr, 0, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + FILE_OPEN, FILE_DELETE_ON_CLOSE | FILE_NON_DIRECTORY_FILE, nullptr, 0); + if (WinAPIReturn(Status)) { + Status = NtClose(Handle); + } + + return WinAPIReturn(Status); +} + DLLEXPORT_FUNC(HANDLE, CreateFileA, (LPCSTR lpFileName, DWORD dwDesiredAccess, DWORD dwShareMode, LPSECURITY_ATTRIBUTES lpSecurityAttributes, DWORD dwCreationDisposition, DWORD dwFlagsAndAttributes, HANDLE hTemplateFile)) { diff --git a/Source/Windows/WOW64/Module.cpp b/Source/Windows/WOW64/Module.cpp index 8a6e552283..950b1c815a 100644 --- a/Source/Windows/WOW64/Module.cpp +++ b/Source/Windows/WOW64/Module.cpp @@ -38,6 +38,7 @@ desc: Implements the WOW64 BT module API using FEXCore #include "Common/CRT/CRT.h" #include "DummyHandlers.h" #include "BTInterface.h" +#include "Windows/Common/Profiler.h" #include #include @@ -105,6 +106,7 @@ namespace BridgeInstrs { fextl::unique_ptr CTX; fextl::unique_ptr SignalDelegator; fextl::unique_ptr SyscallHandler; +fextl::unique_ptr StatAllocHandler; std::optional InvalidationTracker; std::optional CPUFeatures; @@ -499,6 +501,12 @@ void BTCpuProcessInit() { // wow64.dll will only initialise the cross-process queue if this is set GetTLS().Wow64Info().CpuFlags = WOW64_CPUFLAGS_SOFTWARE; + + FEX_CONFIG_OPT(ProfileStats, PROFILESTATS); + + if (IsWine && ProfileStats()) { + StatAllocHandler = fextl::make_unique(FEXCore::Profiler::AppType::WIN_WOW64); + } } void BTCpuProcessTerm(HANDLE Handle, BOOL After, ULONG Status) {} @@ -510,7 +518,11 @@ void BTCpuThreadInit() { GetTLS().ControlWord().fetch_or(ControlBits::WOW_CPU_AREA_DIRTY, std::memory_order::relaxed); std::scoped_lock Lock(ThreadCreationMutex); - Threads.emplace(GetCurrentThreadId(), Thread); + auto ThreadTID = GetCurrentThreadId(); + Threads.emplace(ThreadTID, Thread); + if (StatAllocHandler) { + Thread->ThreadStats = StatAllocHandler->AllocateSlot(ThreadTID); + } } void BTCpuThreadTerm(HANDLE Thread, LONG ExitCode) { @@ -519,6 +531,8 @@ void BTCpuThreadTerm(HANDLE Thread, LONG ExitCode) { return; } + auto* ThreadState = TLS.ThreadState(); + THREAD_BASIC_INFORMATION Info; if (NTSTATUS Err = NtQueryInformationThread(Thread, ThreadBasicInformation, &Info, sizeof(Info), nullptr); Err) { return; @@ -528,9 +542,12 @@ void BTCpuThreadTerm(HANDLE Thread, LONG ExitCode) { { std::scoped_lock Lock(ThreadCreationMutex); Threads.erase(ThreadTID); + if (StatAllocHandler) { + StatAllocHandler->DeallocateSlot(ThreadState->ThreadStats); + } } - CTX->DestroyThread(TLS.ThreadState()); + CTX->DestroyThread(ThreadState); if (ThreadTID == GetCurrentThreadId()) { FEX::Windows::DeinitCRTThread(); } @@ -686,6 +703,7 @@ bool BTCpuResetToConsistentStateImpl(EXCEPTION_POINTERS* Ptrs) { auto* Context = Ptrs->ContextRecord; auto* Exception = Ptrs->ExceptionRecord; auto Thread = GetTLS().ThreadState(); + FEXCORE_PROFILE_ACCUMULATION(Thread, AccumulatedSignalTime); if (Exception->ExceptionCode == EXCEPTION_ACCESS_VIOLATION) { const auto FaultAddress = static_cast(Exception->ExceptionInformation[1]); @@ -701,6 +719,7 @@ bool BTCpuResetToConsistentStateImpl(EXCEPTION_POINTERS* Ptrs) { if (Thread) { std::scoped_lock Lock(ThreadCreationMutex); + FEXCORE_PROFILE_INSTANT_INCREMENT(Thread, AccumulatedSMCCount, 1); if (InvalidationTracker->HandleRWXAccessViolation(FaultAddress)) { LogMan::Msg::DFmt("Handled self-modifying code: pc: {:X} fault: {:X}", Context->Pc, FaultAddress); return true; @@ -712,6 +731,7 @@ bool BTCpuResetToConsistentStateImpl(EXCEPTION_POINTERS* Ptrs) { return false; } + FEXCORE_PROFILE_INSTANT_INCREMENT(Thread, AccumulatedSIGBUSCount, 1); if (Exception->ExceptionCode == EXCEPTION_DATATYPE_MISALIGNMENT && Context::HandleUnalignedAccess(Context)) { LogMan::Msg::DFmt("Handled unaligned atomic: new pc: {:X}", Context->Pc); return true; diff --git a/Source/Windows/include/winternl.h b/Source/Windows/include/winternl.h index d55d1e5695..914d38a2ce 100644 --- a/Source/Windows/include/winternl.h +++ b/Source/Windows/include/winternl.h @@ -375,6 +375,11 @@ typedef struct _SYSTEM_CPU_INFORMATION { ULONG ProcessorFeatureBits; } SYSTEM_CPU_INFORMATION, *PSYSTEM_CPU_INFORMATION; +typedef enum _SECTION_INHERIT { + ViewShare = 1, + ViewUnmap = 2, +} SECTION_INHERIT; + /* definitions of bits in the Feature set for the x86 processors */ #define CPU_FEATURE_VME 0x00000005 /* Virtual 86 Mode Extensions */ #define CPU_FEATURE_TSC 0x00000002 /* Time Stamp Counter available */ @@ -429,6 +434,7 @@ typedef enum _MEMORY_INFORMATION_CLASS { MemoryWineUnixFuncs = 1000, MemoryWineUnixWow64Funcs, #endif + MemoryFexStatsShm = 2000, } MEMORY_INFORMATION_CLASS; typedef enum _KEY_VALUE_INFORMATION_CLASS { @@ -447,6 +453,12 @@ typedef struct _KEY_VALUE_PARTIAL_INFORMATION { UCHAR Data[1]; } KEY_VALUE_PARTIAL_INFORMATION, *PKEY_VALUE_PARTIAL_INFORMATION; +typedef struct _MEMORY_FEX_STATS_SHM_INFORMATION { + void* shm_base; + DWORD map_size; + DWORD max_size; +} MEMORY_FEX_STATS_SHM_INFORMATION, *PMEMORY_FEX_STATS_SHM_INFORMATION; + NTSTATUS WINAPIV DbgPrint(LPCSTR fmt, ...); NTSTATUS WINAPI LdrDisableThreadCalloutsForDll(HMODULE); NTSTATUS WINAPI LdrGetDllFullName(HMODULE, UNICODE_STRING*); @@ -455,10 +467,12 @@ NTSTATUS WINAPI LdrGetProcedureAddress(HMODULE, const ANSI_STRING*, ULONG, void* NTSTATUS WINAPI NtAllocateVirtualMemoryEx(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, MEM_EXTENDED_PARAMETER*, ULONG); NTSTATUS WINAPI NtAllocateVirtualMemory(HANDLE, PVOID*, ULONG_PTR, SIZE_T*, ULONG, ULONG); NTSTATUS WINAPI NtContinue(PCONTEXT, BOOLEAN); +NTSTATUS WINAPI NtCreateSection(HANDLE*, ACCESS_MASK, const OBJECT_ATTRIBUTES*, const LARGE_INTEGER*, ULONG, ULONG, HANDLE); NTSTATUS WINAPI NtFlushInstructionCache(HANDLE, LPCVOID, SIZE_T); NTSTATUS WINAPI NtFreeVirtualMemory(HANDLE, PVOID*, SIZE_T*, ULONG); NTSTATUS WINAPI NtGetContextThread(HANDLE, CONTEXT*); ULONG WINAPI NtGetCurrentProcessorNumber(void); +NTSYSAPI NTSTATUS WINAPI NtMapViewOfSection(HANDLE, HANDLE, PVOID*, ULONG_PTR, SIZE_T, const LARGE_INTEGER*, SIZE_T*, SECTION_INHERIT, ULONG, ULONG); NTSTATUS WINAPI NtOpenKeyEx(PHANDLE, ACCESS_MASK, const OBJECT_ATTRIBUTES*, ULONG); NTSTATUS WINAPI NtProtectVirtualMemory(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG*); NTSTATUS WINAPI NtQueryAttributesFile(const OBJECT_ATTRIBUTES*, FILE_BASIC_INFORMATION*);