Skip to content

Commit

Permalink
Tracking down perf oddities.
Browse files Browse the repository at this point in the history
  • Loading branch information
RossNordby committed Dec 2, 2023
1 parent b4d1292 commit 1cced13
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 7 deletions.
35 changes: 28 additions & 7 deletions BepuPhysics/CollisionDetection/CollidableOverlapFinder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -566,15 +566,15 @@ unsafe static void SelfTestJob3(long id, void* context, int workerIndex, IThread
var overlapFinder = (CollidableOverlapFinder<TCallbacks>)threadDispatcher.ManagedContext;
overlapFinder.selfTestContext3.ExecuteJob((int)id, workerIndex);

overlapFinder.taskAccumulators[workerIndex].FlushToStack(workerIndex, threadDispatcher);
//overlapFinder.taskAccumulators[workerIndex].FlushToStack(workerIndex, threadDispatcher);

}
unsafe static void IntertreeTestJob3(long id, void* context, int workerIndex, IThreadDispatcher threadDispatcher)
{
var overlapFinder = (CollidableOverlapFinder<TCallbacks>)threadDispatcher.ManagedContext;
overlapFinder.intertreeTestContext3.ExecuteJob((int)id, workerIndex);

overlapFinder.taskAccumulators[workerIndex].FlushToStack(workerIndex, threadDispatcher);
//overlapFinder.taskAccumulators[workerIndex].FlushToStack(workerIndex, threadDispatcher);
}

unsafe static void NarrowPhaseJob3(long id, void* untypedContext, int workerIndex, IThreadDispatcher threadDispatcher)
Expand All @@ -592,6 +592,19 @@ unsafe static void NarrowPhaseJob3(long id, void* untypedContext, int workerInde
overlapFinder.taskAccumulators[workerIndex].PairsTestedOnThread += pairsToTest.Count;
}

unsafe static bool TryExecuteNarrowPhaseJobInline(CollidableOverlapFinder<TCallbacks> overlapFinder, int workerIndex, IThreadDispatcher dispatcher)
{
var pairs = &overlapFinder.taskAccumulators.GetPointer(workerIndex)->Pairs;
if (pairs->Count > 0)
{
NarrowPhaseJob3(0, pairs, workerIndex, dispatcher);
//If this is invoked while the broad phase is still working, the already processed pairs should not be double counted, so zero it.
pairs->Count = 0;
return true;
}
return false;
}

unsafe static void Worker3(int workerIndex, IThreadDispatcher threadDispatcher)
{
var overlapFinder = (CollidableOverlapFinder<TCallbacks>)threadDispatcher.ManagedContext;
Expand Down Expand Up @@ -639,6 +652,12 @@ unsafe static void Worker3(int workerIndex, IThreadDispatcher threadDispatcher)
//We don't want to keep suffering the communication overhead associated with the broad phase stack if we know there will never be any work left; just decay to narrow only.
break;
}
if (TryExecuteNarrowPhaseJobInline(overlapFinder, workerIndex, threadDispatcher))
{
//The thread has a moment. Go ahead and eat inline narrow phase work if it exists.
waiter.Reset();
continue;
}
if (broadResult == PopTaskResult.Success)
{
//We only want to continue to the narrow phase test if there are no pending broad phase tests. Broad phase tests generate narrow phase work.
Expand Down Expand Up @@ -667,10 +686,7 @@ unsafe static void Worker3(int workerIndex, IThreadDispatcher threadDispatcher)
//Note that by submitting these pairs to the narrow phase inline, the next loop doesn't need to wait on a sync.
//(Consider the alternative: if these pairs were submitted to the narrow phase stack, then the next loop can't know that there are no more narrow phase tasks unless we inserted another sync point.
//The termination of the broad phase stack with the Stop command serves as the sync point for all narrow phase stack work created by the broad phase.)
if (overlapFinder.taskAccumulators[workerIndex].Pairs.Count > 0)
{
NarrowPhaseJob3(0, &overlapFinder.taskAccumulators.GetPointer(workerIndex)->Pairs, workerIndex, threadDispatcher);
}
TryExecuteNarrowPhaseJobInline(overlapFinder, workerIndex, threadDispatcher);

//So, at this point, the narrow phase stack will receive no further jobs. Just keep chomping until it's empty.
while (true)
Expand Down Expand Up @@ -712,6 +728,7 @@ public struct TaskAccumulator(BufferPool threadPool, int maximumTaskSize, int es
/// It's convenient to track this here because the accumulator is already thread local.
/// </summary>
public int PairsTestedOnThread;
public int FlushCount;

public void Accumulate(CollidablePair pair, int workerIndex, IThreadDispatcher dispatcher)
{
Expand All @@ -726,6 +743,7 @@ public void FlushToStack(int workerIndex, IThreadDispatcher dispatcher)
{
if (Pairs.Count > 0)
{
++FlushCount;
var pool = dispatcher.WorkerPools[workerIndex];
ref var taskContext = ref TaskContexts.Allocate(pool);
taskContext = Pairs;
Expand Down Expand Up @@ -793,14 +811,14 @@ public void DispatchOverlaps3(float dt, IThreadDispatcher threadDispatcher = nul
selfTestHandlers3 = new PairCollector3[threadDispatcher.ThreadCount];
intertreeTestHandlers3 = new PairCollector3[threadDispatcher.ThreadCount];
}
//Decay the initial capacity for chunks slowly over time.
taskAccumulators = new Buffer<TaskAccumulator>(threadDispatcher.ThreadCount, narrowPhase.Pool);
var broadTaskStack = new TaskStack(narrowPhase.Pool, threadDispatcher, threadDispatcher.ThreadCount);
var narrowTaskStack = new TaskStack(narrowPhase.Pool, threadDispatcher, threadDispatcher.ThreadCount);
broadStack = &broadTaskStack;
narrowStack = &narrowTaskStack;
const int targetJobsPerThread = 4;
int maximumTaskSize = int.Max(1, previousPairCount3 / (threadDispatcher.ThreadCount * targetJobsPerThread));
//Console.WriteLine($"maixmum taskese: {maximumTaskSize}");
var estimatedMaximumTaskCountPerThread = targetJobsPerThread * 2;
for (int i = 0; i < threadDispatcher.ThreadCount; ++i)
{
Expand All @@ -821,6 +839,7 @@ public void DispatchOverlaps3(float dt, IThreadDispatcher threadDispatcher = nul
var debugMinIndex = 0;
var debugMax = 0;
var debugMaxIndex = 0;
int totalFlushCount = 0;
for (int i = 0; i < threadDispatcher.ThreadCount; ++i)
{
ref var accumulator = ref taskAccumulators[i];
Expand All @@ -836,6 +855,7 @@ public void DispatchOverlaps3(float dt, IThreadDispatcher threadDispatcher = nul
debugMax = accumulator.PairsTestedOnThread;
debugMaxIndex = i;
}
totalFlushCount += accumulator.FlushCount;
accumulator.Dispose(threadDispatcher.WorkerPools[i]);
}
previousPairCount3 = totalPairCount;
Expand All @@ -845,6 +865,7 @@ public void DispatchOverlaps3(float dt, IThreadDispatcher threadDispatcher = nul
Console.WriteLine($"min: {debugMinIndex}, {debugMin / (double)totalPairCount}");
Console.WriteLine($"max: {debugMaxIndex}, {debugMax / (double)totalPairCount}");
Console.WriteLine($"sum: {totalPairCount}");
Console.WriteLine($"TFC: {totalFlushCount}");

#if DEBUG
for (int i = 1; i < threadDispatcher.ThreadCount; ++i)
Expand Down
69 changes: 69 additions & 0 deletions Demos/SpecializedTests/SimpleTestDemo.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
using BepuUtilities;
using DemoRenderer;
using BepuPhysics;
using BepuPhysics.Collidables;
using System;
using System.Numerics;
using BepuUtilities.Collections;
using DemoContentLoader;
using BepuPhysics.Constraints;
using DemoUtilities;

namespace Demos.SpecializedTests
{
public class SimpleTestDemo : Demo
{
public override void Initialize(ContentArchive content, Camera camera)
{
camera.Position = new Vector3(-30, 10, -30);
//camera.Yaw = MathHelper.Pi ;
camera.Yaw = MathHelper.Pi * 3f / 4;
//camera.Pitch = MathHelper.PiOver2 * 0.999f;
Simulation = Simulation.Create(BufferPool, new DemoNarrowPhaseCallbacks(new SpringSettings(30, 1)), new DemoPoseIntegratorCallbacks(new Vector3(0, -10, 0)), new SolveDescription(4, 1));

var box = new Box(1f, 3f, 2f);

var boxInertia = box.ComputeInertia(1);
var boxIndex = Simulation.Shapes.Add(box);
const int width = 256;
const int height = 1;
const int length = 256;
var shapeCount = 0;
for (int i = 0; i < width; ++i)
{
for (int j = 0; j < height; ++j)
{
for (int k = 0; k < length; ++k)
{
var location = new Vector3(6, 3, 6) * new Vector3(i, j, k) + new Vector3(-width * 3, 5.5f, -length * 3);
var bodyDescription = BodyDescription.CreateDynamic(location, boxInertia, boxIndex, -0.01f);
var index = shapeCount++;
Simulation.Bodies.Add(bodyDescription);
}
}
}

Simulation.Statics.Add(new StaticDescription(new Vector3(), Simulation.Shapes.Add(new Box(5000, 1, 5000))));
//var mesh = DemoMeshHelper.CreateDeformedPlane(128, 128, (x, y) => new Vector3(x - 64, 2f * (float)(Math.Sin(x * 0.5f) * Math.Sin(y * 0.5f)), y - 64), new Vector3(4, 1, 4), BufferPool);
//Simulation.Statics.Add(new StaticDescription(new Vector3(), Simulation.Shapes.Add(mesh)));
}

double time = 0;
long frameCount = 0;
public override void Update(Window window, Camera camera, Input input, float dt)
{
base.Update(window, camera, input, dt);
const long minimumFrameToMeasure = 256;
frameCount++;
if (frameCount >= minimumFrameToMeasure)
{
var frameTime = Simulation.Profiler[Simulation.BroadPhaseOverlapFinder];
time += frameTime;
Console.WriteLine($"coldet time (ms): {1e3 * frameTime}, average (ms): {1e3 * time / (frameCount - minimumFrameToMeasure)}");
}

}
}
}


0 comments on commit 1cced13

Please sign in to comment.