Skip to content

Commit

Permalink
do vertex restride on the GPU & don't over-sync
Browse files Browse the repository at this point in the history
  • Loading branch information
SamoZ256 committed Sep 23, 2024
1 parent 8b68df0 commit 3cf831d
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 12 deletions.
12 changes: 4 additions & 8 deletions src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ MetalVertexBufferCache::~MetalVertexBufferCache()
{
}

MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Buffer* bufferCache, uint32 bufferIndex, size_t stride)
MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Buffer* bufferCache, uint32 bufferIndex, size_t stride, std::vector<MTL::Resource*>& barrierBuffers)
{
auto vertexBufferRange = m_bufferRanges[bufferIndex];
auto& restrideInfo = *vertexBufferRange.restrideInfo;
Expand All @@ -28,14 +28,14 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu
restrideInfo.allocation = m_bufferAllocator.GetBufferAllocation(newSize);
buffer = m_bufferAllocator.GetBuffer(restrideInfo.allocation.bufferIndex);

// HACK: the restriding is done on the CPU, since doing it on the GPU was causing over-synchronization
/*
uint8* oldPtr = (uint8*)bufferCache->contents() + vertexBufferRange.offset;
uint8* newPtr = (uint8*)buffer->contents() + restrideInfo.allocation.offset;
for (size_t elem = 0; elem < vertexBufferRange.size / stride; elem++)
memcpy(newPtr + elem * newStride, oldPtr + elem * stride, stride);
*/

/*
if (m_mtlr->GetEncoderType() == MetalEncoderType::Render)
{
auto renderCommandEncoder = static_cast<MTL::RenderCommandEncoder*>(m_mtlr->GetCommandEncoder());
Expand All @@ -56,16 +56,12 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu

renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), vertexBufferRange.size / stride);

// TODO: do the barriers in one call?
MTL::Resource* barrierBuffers[] = {buffer};
renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex, MTL::RenderStageVertex);
vectorAppendUnique(barrierBuffers, static_cast<MTL::Resource*>(buffer));
}
else
{
debug_printf("vertex buffer restride needs an active render command encoder\n");
cemu_assert_suspicious();
}
*/

restrideInfo.memoryInvalidated = false;
restrideInfo.lastStride = newStride;
Expand Down
6 changes: 3 additions & 3 deletions src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class MetalVertexBufferCache
range.offset = INVALID_OFFSET;
}

MetalRestridedBufferRange RestrideBufferIfNeeded(MTL::Buffer* bufferCache, uint32 bufferIndex, size_t stride);
MetalRestridedBufferRange RestrideBufferIfNeeded(MTL::Buffer* bufferCache, uint32 bufferIndex, size_t stride, std::vector<MTL::Resource*>& barrierBuffers);

private:
class MetalRenderer* m_mtlr;
Expand Down Expand Up @@ -105,9 +105,9 @@ class MetalMemoryManager
m_vertexBufferCache.UntrackVertexBuffer(bufferIndex);
}

MetalRestridedBufferRange RestrideBufferIfNeeded(uint32 bufferIndex, size_t stride)
MetalRestridedBufferRange RestrideBufferIfNeeded(uint32 bufferIndex, size_t stride, std::vector<MTL::Resource*>& barrierBuffers)
{
return m_vertexBufferCache.RestrideBufferIfNeeded(m_bufferCache, bufferIndex, stride);
return m_vertexBufferCache.RestrideBufferIfNeeded(m_bufferCache, bufferIndex, stride, barrierBuffers);
}

private:
Expand Down
8 changes: 7 additions & 1 deletion src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1141,6 +1141,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
// Resources

// Vertex buffers
std::vector<MTL::Resource*> barrierBuffers;
for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++)
{
auto& vertexBufferRange = m_state.m_vertexBuffers[i];
Expand All @@ -1161,7 +1162,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + i * 7;
uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;

auto restridedBuffer = m_memoryManager->RestrideBufferIfNeeded(i, bufferStride);
auto restridedBuffer = m_memoryManager->RestrideBufferIfNeeded(i, bufferStride, barrierBuffers);

buffer = restridedBuffer.buffer;
offset = restridedBuffer.offset;
Expand All @@ -1172,6 +1173,11 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
}
}

if (!barrierBuffers.empty())
{
renderCommandEncoder->memoryBarrier(barrierBuffers.data(), barrierBuffers.size(), MTL::RenderStageVertex, MTL::RenderStageVertex);
}

// Render pipeline state
MTL::RenderPipelineState* renderPipelineState;
if (usesGeometryShader)
Expand Down

0 comments on commit 3cf831d

Please sign in to comment.