Skip to content

Commit

Permalink
Wine: Implements support for profile stats
Browse files Browse the repository at this point in the history
This is a little trickier, we actually open the
`/dev/shm/fex-<pid>-stats` file directly using Windows APIs that way
Mangohud (which is going to be on the Linux side, or potentially even
embedded in to Gamescope) can safely pick up the stats.

A little quirky plus doesn't support expanding its size since WINE
doesn't support NtExtendSection, but that's fine.
  • Loading branch information
Sonicadvance1 committed Jan 22, 2025
1 parent 3bdc69d commit 46dca8a
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 5 deletions.
20 changes: 18 additions & 2 deletions Source/Windows/ARM64EC/Module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ desc: Implements the ARM64EC BT module API using FEXCore
#include "Common/CRT/CRT.h"
#include "DummyHandlers.h"
#include "BTInterface.h"
#include "Windows/Common/Profiler.h"

#include <cstdint>
#include <cstdio>
Expand Down Expand Up @@ -122,6 +123,7 @@ namespace {
fextl::unique_ptr<FEXCore::Context::Context> CTX;
fextl::unique_ptr<FEX::DummyHandlers::DummySignalDelegator> SignalDelegator;
fextl::unique_ptr<Exception::ECSyscallHandler> SyscallHandler;
fextl::unique_ptr<FEX::Windows::StatAlloc> StatAllocHandler;
std::optional<FEX::Windows::InvalidationTracker> InvalidationTracker;
std::optional<FEX::Windows::CPUFeatures> CPUFeatures;
std::optional<FEX::Windows::OvercommitTracker> OvercommitTracker;
Expand Down Expand Up @@ -569,10 +571,17 @@ NTSTATUS ProcessInit() {
const uintptr_t KiUserExceptionDispatcherFFS = reinterpret_cast<uintptr_t>(GetProcAddress(NtDll, "KiUserExceptionDispatcher"));
Exception::KiUserExceptionDispatcher = NtDllRedirectionLUT[KiUserExceptionDispatcherFFS - NtDllBase] + NtDllBase;

FEX_CONFIG_OPT(ProfileStats, PROFILESTATS);

if (IsWine && ProfileStats()) {
StatAllocHandler = fextl::make_unique<FEX::Windows::StatAlloc>(FEXCore::Profiler::AppType::WIN_ARM64EC);
}
return STATUS_SUCCESS;
}

void ProcessTerm(HANDLE Handle, BOOL After, NTSTATUS Status) {}
void ProcessTerm(HANDLE Handle, BOOL After, NTSTATUS Status) {
StatAllocHandler.reset();
}

class ScopedCallbackDisable {
private:
Expand Down Expand Up @@ -808,7 +817,11 @@ NTSTATUS ThreadInit() {

{
std::scoped_lock Lock(ThreadCreationMutex);
Threads.emplace(GetCurrentThreadId(), Thread);
auto ThreadTID = GetCurrentThreadId();
Threads.emplace(ThreadTID, Thread);
if (StatAllocHandler) {
Thread->ThreadStats = StatAllocHandler->AllocateSlot(ThreadTID);
}
}

CPUArea.ThreadState() = Thread;
Expand All @@ -833,6 +846,9 @@ NTSTATUS ThreadTerm(HANDLE Thread, LONG ExitCode) {
{
std::scoped_lock Lock(ThreadCreationMutex);
Threads.erase(ThreadTID);
if (StatAllocHandler) {
StatAllocHandler->DeallocateSlot(OldThreadState->ThreadStats);
}
}

CTX->DestroyThread(OldThreadState);
Expand Down
2 changes: 1 addition & 1 deletion Source/Windows/Common/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
add_library(CommonWindows STATIC CPUFeatures.cpp InvalidationTracker.cpp Logging.cpp LoadConfig.S)
add_library(CommonWindows STATIC CPUFeatures.cpp Profiler.cpp InvalidationTracker.cpp Logging.cpp LoadConfig.S)
add_subdirectory(CRT)
add_subdirectory(WinAPI)
target_link_libraries(CommonWindows FEXCore_Base JemallocLibs)
Expand Down
65 changes: 65 additions & 0 deletions Source/Windows/Common/Profiler.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// SPDX-License-Identifier: MIT
#include "Windows/Common/Profiler.h"

#include <FEXCore/fextl/fmt.h>
#include <FEXCore/Utils/LogManager.h>

#include <ntstatus.h>
#include <windef.h>
#include <winternl.h>
#include <winnt.h>
#include <wine/debug.h>

namespace FEX::Windows {
__attribute__((naked)) uint64_t linux_getpid() {
asm volatile(R"(
mov x8, 172;
svc #0;
ret;
)" ::
: "r0", "r8");
}

uint64_t StatAlloc::AllocateMoreSlots(uint64_t NewSize) {
LogMan::Msg::DFmt("Ran out of slots. Can't allocate more");
return CurrentSize;
}

StatAlloc::StatAlloc(FEXCore::Profiler::AppType AppType) {
CurrentSize = MAX_STATS_SIZE;

auto handle = CreateFile(fextl::fmt::format("/dev/shm/fex-{}-stats", linux_getpid()).c_str(), GENERIC_READ | GENERIC_WRITE,
FILE_SHARE_READ, nullptr, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr);

// Create the section mapping for the file handle for the full size.
HANDLE SectionMapping;
LARGE_INTEGER SectionSize {{MAX_STATS_SIZE}};
auto Result = NtCreateSection(&SectionMapping, SECTION_EXTEND_SIZE | SECTION_MAP_READ | SECTION_MAP_WRITE, nullptr, &SectionSize,
PAGE_READWRITE, SEC_COMMIT, handle);
if (Result != 0) {
CloseHandle(handle);
return;
}

// Section mapping is used from now on.
CloseHandle(handle);

// Now actually map the view of the section.
Base = 0;
size_t FullSize = MAX_STATS_SIZE;
Result = NtMapViewOfSection(SectionMapping, NtCurrentProcess(), &Base, 0, 0, nullptr, &FullSize, ViewUnmap, MEM_RESERVE | MEM_TOP_DOWN,
PAGE_READWRITE);
if (Result != 0) {
CloseHandle(SectionMapping);
return;
}

// Once WINE supports NtExtendSection and SECTION_EXTEND_SIZE correctly then we can map/commit a single page, map the full MAX_STATS_SIZE
// view as reserved, and extend the view using NtExtendSection.
SaveHeader(AppType);
}
StatAlloc::~StatAlloc() {
DeleteFile(fextl::fmt::format("/dev/shm/fex-{}-stats", linux_getpid()).c_str());
}

} // namespace FEX::Windows
28 changes: 28 additions & 0 deletions Source/Windows/Common/Profiler.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// SPDX-License-Identifier: MIT
#pragma once

#include "Common/Profiler.h"

namespace FEX::Windows {
class StatAlloc final : public FEX::Profiler::StatAllocBase {
public:
StatAlloc(FEXCore::Profiler::AppType AppType);
virtual ~StatAlloc();

FEXCore::Profiler::ThreadStats* AllocateSlot(uint32_t TID) {
return AllocateBaseSlot(TID);
}

void DeallocateSlot(FEXCore::Profiler::ThreadStats* AllocatedSlot) {
if (!AllocatedSlot) {
return;
}

DeallocateBaseSlot(AllocatedSlot);
}

private:
uint64_t AllocateMoreSlots(uint64_t NewSize) override;
};

} // namespace FEX::Windows
21 changes: 19 additions & 2 deletions Source/Windows/WOW64/Module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ desc: Implements the WOW64 BT module API using FEXCore
#include "Common/CRT/CRT.h"
#include "DummyHandlers.h"
#include "BTInterface.h"
#include "Windows/Common/Profiler.h"

#include <cstdint>
#include <type_traits>
Expand Down Expand Up @@ -105,6 +106,7 @@ namespace BridgeInstrs {
fextl::unique_ptr<FEXCore::Context::Context> CTX;
fextl::unique_ptr<FEX::DummyHandlers::DummySignalDelegator> SignalDelegator;
fextl::unique_ptr<WowSyscallHandler> SyscallHandler;
fextl::unique_ptr<FEX::Windows::StatAlloc> StatAllocHandler;

std::optional<FEX::Windows::InvalidationTracker> InvalidationTracker;
std::optional<FEX::Windows::CPUFeatures> CPUFeatures;
Expand Down Expand Up @@ -499,9 +501,17 @@ void BTCpuProcessInit() {

// wow64.dll will only initialise the cross-process queue if this is set
GetTLS().Wow64Info().CpuFlags = WOW64_CPUFLAGS_SOFTWARE;

FEX_CONFIG_OPT(ProfileStats, PROFILESTATS);

if (IsWine && ProfileStats()) {
StatAllocHandler = fextl::make_unique<FEX::Windows::StatAlloc>(FEXCore::Profiler::AppType::WIN_WOW64);
}
}

void BTCpuProcessTerm(HANDLE Handle, BOOL After, ULONG Status) {}
void BTCpuProcessTerm(HANDLE Handle, BOOL After, ULONG Status) {
StatAllocHandler.reset();
}

void BTCpuThreadInit() {
FEX::Windows::InitCRTThread();
Expand All @@ -510,7 +520,11 @@ void BTCpuThreadInit() {
GetTLS().ControlWord().fetch_or(ControlBits::WOW_CPU_AREA_DIRTY, std::memory_order::relaxed);

std::scoped_lock Lock(ThreadCreationMutex);
Threads.emplace(GetCurrentThreadId(), Thread);
auto ThreadTID = GetCurrentThreadId();
Threads.emplace(ThreadTID, Thread);
if (StatAllocHandler) {
Thread->ThreadStats = StatAllocHandler->AllocateSlot(ThreadTID);
}
}

void BTCpuThreadTerm(HANDLE Thread, LONG ExitCode) {
Expand All @@ -530,6 +544,9 @@ void BTCpuThreadTerm(HANDLE Thread, LONG ExitCode) {
{
std::scoped_lock Lock(ThreadCreationMutex);
Threads.erase(ThreadTID);
if (StatAllocHandler) {
StatAllocHandler->DeallocateSlot(OldThreadState->ThreadStats);
}
}

CTX->DestroyThread(OldThreadState);
Expand Down

0 comments on commit 46dca8a

Please sign in to comment.