Skip to content

Commit

Permalink
Revamped TLAS/BLAS data.
Browse files Browse the repository at this point in the history
  • Loading branch information
jbikker committed Jan 14, 2025
1 parent 6a37247 commit 52a89a6
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 56 deletions.
134 changes: 87 additions & 47 deletions tiny_bvh.h
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,7 @@ class BVHBase
uint32_t usedNodes = 0; // number of nodes used for the BVH.
uint32_t triCount = 0; // number of primitives in the BVH.
uint32_t idxCount = 0; // number of primitive indices; can exceed triCount for SBVH.
bvhvec3 aabbMin, aabbMax; // bounds of the root node of the BVH.
// Custom memory allocation
void* AlignedAlloc( size_t size );
void AlignedFree( void* ptr );
Expand Down Expand Up @@ -599,7 +600,7 @@ class BVH : public BVHBase
void Build( const bvhvec4slice& vertices );
void Build( const bvhvec4* vertices, const uint32_t* indices, const uint32_t primCount );
void Build( const bvhvec4slice& vertices, const uint32_t* indices, const uint32_t primCount );
void Build( const BLASInstance* bvhs, const uint32_t instCount );
void Build( BLASInstance* instances, const uint32_t instCount, BVHBase** blasses, const uint32_t blasCount );
void BuildHQ( const bvhvec4* vertices, const uint32_t primCount );
void BuildHQ( const bvhvec4slice& vertices );
void BuildHQ( const bvhvec4* vertices, const uint32_t* indices, const uint32_t primCount );
Expand Down Expand Up @@ -649,6 +650,8 @@ class BVH : public BVHBase
uint32_t* vertIdx = 0; // vertex indices, only used in case the BVH is built over indexed prims.
uint32_t* triIdx = 0; // primitive index array.
BLASInstance* instList = 0; // instance array, for top-level acceleration structure.
BVHBase** blasList = 0; // blas array, for TLAS traversal.
uint32_t blasCount = 0; // number of blasses in blasList.
BVHNode* bvhNode = 0; // BVH node pool, Wald 32-byte format. Root is always in node 0.
uint32_t newNodePtr = 0; // used during build to keep track of next free node in pool.
Fragment* fragment = 0; // input primitive bounding boxes.
Expand Down Expand Up @@ -684,7 +687,7 @@ class BVH_Double : public BVHBase
BVH_Double( BVHContext ctx = {} ) { context = ctx; }
~BVH_Double();
void Build( const bvhdbl3* vertices, const uint64_t primCount );
void Build( const BLASInstanceEx* bvhs, const uint64_t instCount );
void Build( BLASInstanceEx* bvhs, const uint64_t instCount, BVH_Double** blasses, const uint64_t blasCount );
void PrepareBuild( const bvhdbl3* vertices, const uint64_t primCount );
void Build();
double SAHCost( const uint64_t nodeIdx = 0 ) const;
Expand All @@ -697,12 +700,15 @@ class BVH_Double : public BVHBase
BVHNode* bvhNode = 0; // BVH node, double precision format.
uint64_t* triIdx = 0; // primitive index array for double-precision bvh.
BLASInstanceEx* instList = 0; // instance array, for top-level acceleration structure.
BVH_Double** blasList = 0; // blas array, for TLAS traversal.
uint64_t blasCount = 0; // number of blasses in blasList.
// 64-bit base overrides
uint64_t newNodePtr = 0; // next free bvh pool entry to allocate
uint64_t usedNodes = 0; // number of nodes used for the BVH.
uint64_t allocatedNodes = 0; // number of nodes allocated for the BVH.
uint64_t triCount = 0; // number of primitives in the BVH.
uint64_t idxCount = 0; // number of primitive indices.
bvhdbl3 aabbMin, aabbMax; // bounds of the root node of the BVH.
// Custom geometry intersection callback
void (*customIntersect)(RayEx&, uint64_t) = 0;
bool (*customIsOccluded)(const RayEx&, uint64_t) = 0;
Expand Down Expand Up @@ -956,16 +962,20 @@ class BVH8_CWBVH : public BVHBase
// BLASInstance: A TLAS is built over BLAS instances, where a single BLAS can be
// used with multiple transforms, and multiple BLASses can be combined in a complex
// scene. The TLAS is built over the world-space AABBs of the BLAS root nodes.
class BLASInstance
class ALIGNED( 64 ) BLASInstance
{
public:
BLASInstance() = default;
BLASInstance( BVH* bvh ) : blas( bvh ) {}
BLASInstance( uint32_t idx ) : blasIdx( idx ) {}
float transform[16] = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1 }; // identity
float invTransform[16] = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1 }; // identity
BVH* blas = 0; // Bottom-level acceleration structure.
bvhvec3 TransformPoint( const bvhvec3& v, const float* T ) const;
bvhvec3 TransformVector( const bvhvec3& v, const float* T ) const;
bvhvec3 aabbMin = bvhvec3( BVH_FAR );
uint32_t blasIdx = 0;
bvhvec3 aabbMax = bvhvec3( -BVH_FAR );
uint32_t dummy = 0;
void Update( BVHBase * blas );
bvhvec3 TransformPoint( const bvhvec3 & v, const float* T ) const;
bvhvec3 TransformVector( const bvhvec3 & v, const float* T ) const;
void InvertTransform();
};

Expand All @@ -974,10 +984,14 @@ class BLASInstanceEx
{
public:
BLASInstanceEx() = default;
BLASInstanceEx( BVH_Double* bvh ) : blas( bvh ) {}
BLASInstanceEx( uint64_t idx ) : blasIdx( idx ) {}
double transform[16] = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1 }; // identity
double invTransform[16] = { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1 }; // identity
BVH_Double* blas = 0; // Bottom-level acceleration structure.
bvhdbl3 aabbMin = bvhdbl3( BVH_DBL_FAR );
uint64_t blasIdx = 0;
bvhdbl3 aabbMax = bvhdbl3( -BVH_DBL_FAR );
uint64_t dummy = 0;
void Update( BVH_Double* blas );
bvhdbl3 TransformPoint( const bvhdbl3& v, const double* T ) const;
bvhdbl3 TransformVector( const bvhdbl3& v, const double* T ) const;
void InvertTransform();
Expand Down Expand Up @@ -1135,6 +1149,7 @@ void BVHBase::CopyBasePropertiesFrom( const BVHBase& original )
this->context = original.context;
this->triCount = original.triCount;
this->idxCount = original.idxCount;
this->aabbMin = original.aabbMin, this->aabbMax = original.aabbMax;
}

// BVH implementation
Expand Down Expand Up @@ -1338,6 +1353,7 @@ void BVH::BuildQuick( const bvhvec4slice& vertices )
if (taskCount == 0) break; else nodeIdx = task[--taskCount];
}
// all done.
aabbMin = bvhNode[0].aabbMin, aabbMax = bvhNode[0].aabbMax;
refittable = true; // not using spatial splits: can refit this BVH
may_have_holes = false; // the reference builder produces a continuous list of nodes
usedNodes = newNodePtr;
Expand Down Expand Up @@ -1374,7 +1390,7 @@ void BVH::Build( const bvhvec4slice& vertices, const uint32_t* indices, uint32_t
bvh_over_indices = true;
}

void BVH::Build( const BLASInstance* bvhs, const uint32_t instCount )
void BVH::Build( BLASInstance* instances, const uint32_t instCount, BVHBase** blasses, const uint32_t bCount )
{
FATAL_ERROR_IF( instCount == 0, "BVH::Build( BLASInstance*, instCount ), instCount == 0." );
triCount = idxCount = instCount;
Expand All @@ -1390,28 +1406,21 @@ void BVH::Build( const BLASInstance* bvhs, const uint32_t instCount )
triIdx = (uint32_t*)AlignedAlloc( instCount * sizeof( uint32_t ) );
fragment = (Fragment*)AlignedAlloc( instCount * sizeof( Fragment ) );
}
instList = (BLASInstance*)bvhs;
instList = instances;
blasList = blasses;
blasCount = bCount;
// copy relevant data from instance array
BVHNode& root = bvhNode[0];
root.leftFirst = 0, root.triCount = instCount, root.aabbMin = bvhvec3( BVH_FAR ), root.aabbMax = bvhvec3( -BVH_FAR );
for (uint32_t i = 0; i < instCount; i++)
{
FATAL_ERROR_IF( instList[i].blas->bvhNode == 0, "BVH::Build( BLASInstance*, .. ), BLAS not built." );
// transform the eight corners of the root node aabb using the instance
// transform and calculate the worldspace aabb over those.
instList[i].InvertTransform(); // TODO: done unconditionally; for a big TLAS this may be wasteful. Detect changes automatically?
bvhvec3 minBounds = bvhvec3( BVH_FAR ), maxBounds = bvhvec3( -BVH_FAR );
bvhvec3 bmin = instList[i].blas->bvhNode[0].aabbMin, bmax = instList[i].blas->bvhNode[0].aabbMax;
for (int32_t j = 0; j < 8; j++)
{
const bvhvec3 p( j & 1 ? bmax.x : bmin.x, j & 2 ? bmax.y : bmin.y, j & 4 ? bmax.z : bmin.z );
const bvhvec3 t = instList[i].TransformPoint( p, instList[i].transform );
minBounds = tinybvh_min( minBounds, t ), maxBounds = tinybvh_max( maxBounds, t );
}
fragment[i].bmin = minBounds, fragment[i].primIdx = i;
fragment[i].bmax = maxBounds, fragment[i].clipped = 0;
root.aabbMin = tinybvh_min( root.aabbMin, minBounds );
root.aabbMax = tinybvh_max( root.aabbMax, maxBounds ), triIdx[i] = i;
uint32_t blasIdx = instList[i].blasIdx;
BVH* blas = (BVH*)blasList[blasIdx];
instList[i].Update( blas );
fragment[i].bmin = instList[i].aabbMin, fragment[i].primIdx = i;
fragment[i].bmax = instList[i].aabbMax, fragment[i].clipped = 0;
root.aabbMin = tinybvh_min( root.aabbMin, instList[i].aabbMin );
root.aabbMax = tinybvh_max( root.aabbMax, instList[i].aabbMax ), triIdx[i] = i;
}
// start build
newNodePtr = 2;
Expand Down Expand Up @@ -1563,6 +1572,7 @@ void BVH::Build()
if (taskCount == 0) break; else nodeIdx = task[--taskCount];
}
// all done.
aabbMin = bvhNode[0].aabbMin, aabbMax = bvhNode[0].aabbMax;
refittable = true; // not using spatial splits: can refit this BVH
may_have_holes = false; // the reference builder produces a continuous list of nodes
bvh_over_aabbs = (verts == 0); // bvh over aabbs is suitable as TLAS
Expand Down Expand Up @@ -1902,6 +1912,7 @@ void BVH::BuildHQ()
for (uint32_t i = 0; i < triCount + slack; i++) triIdx[i] = fragment[triIdx[i]].primIdx;
AlignedFree( triIdxB );
// all done.
aabbMin = bvhNode[0].aabbMin, aabbMax = bvhNode[0].aabbMax;
refittable = false; // can't refit an SBVH
may_have_holes = false; // there may be holes in the index list, but not in the node list
usedNodes = newNodePtr;
Expand Down Expand Up @@ -1960,6 +1971,7 @@ void BVH::Refit( const uint32_t nodeIdx )
node.aabbMin = tinybvh_min( left.aabbMin, right.aabbMin );
node.aabbMax = tinybvh_max( left.aabbMax, right.aabbMax );
}
aabbMin = bvhNode[0].aabbMin, aabbMax = bvhNode[0].aabbMax;
}

int32_t BVH::Intersect( Ray& ray ) const
Expand Down Expand Up @@ -2013,7 +2025,7 @@ int32_t BVH::IntersectTLAS( Ray& ray ) const
// BLAS traversal
const uint32_t instIdx = triIdx[node->leftFirst + i];
const BLASInstance& inst = instList[instIdx];
const BVH* blas = inst.blas;
const BVH* blas = (const BVH*)blasList[inst.blasIdx]; // TODO: actually we don't know BVH type.
// 1. Transform ray with the inverse of the instance transform
tmp.O = inst.TransformPoint( ray.O, inst.invTransform );
tmp.D = inst.TransformVector( ray.D, inst.invTransform );
Expand Down Expand Up @@ -2103,7 +2115,7 @@ bool BVH::IsOccludedTLAS( const Ray& ray ) const
{
// BLAS traversal
BLASInstance& inst = instList[triIdx[node->leftFirst + i]];
BVH* blas = inst.blas;
BVH* blas = (BVH*)blasList[inst.blasIdx]; // TODO: actually we don't know BVH type.
// 1. Transform ray with the inverse of the instance transform
tmp.O = inst.TransformPoint( ray.O, inst.invTransform );
tmp.D = inst.TransformVector( ray.D, inst.invTransform );
Expand Down Expand Up @@ -2428,6 +2440,7 @@ void BVH_Verbose::Refit( const uint32_t nodeIdx )
node.aabbMin = tinybvh_min( bvhNode[node.left].aabbMin, bvhNode[node.right].aabbMin );
node.aabbMax = tinybvh_max( bvhNode[node.left].aabbMax, bvhNode[node.right].aabbMax );
}
if (nodeIdx == 0) aabbMin = node.aabbMin, aabbMax = node.aabbMax;
}

void BVH_Verbose::Compact()
Expand Down Expand Up @@ -2955,6 +2968,7 @@ template<int M> void MBVH<M>::Refit( const uint32_t nodeIdx )
bmax = tinybvh_max( bmax, child.aabbMax );
}
}
if (nodeIdx == 0) aabbMin = node.aabbMin, aabbMax = node.aabbMax;
}

template<int M> void MBVH<M>::ConvertFrom( const BVH& original )
Expand Down Expand Up @@ -5546,7 +5560,7 @@ BVH_Double::~BVH_Double()
AlignedFree( triIdx );
}

void BVH_Double::Build( const BLASInstanceEx* bvhs, const uint64_t instCount )
void BVH_Double::Build( BLASInstanceEx* bvhs, const uint64_t instCount, BVH_Double** blasses, const uint64_t bCount )
{
FATAL_ERROR_IF( instCount == 0, "BVH_Double::Build( BLASInstanceEx*, instCount ), instCount == 0." );
triCount = idxCount = instCount;
Expand All @@ -5562,26 +5576,19 @@ void BVH_Double::Build( const BLASInstanceEx* bvhs, const uint64_t instCount )
fragment = (Fragment*)AlignedAlloc( instCount * sizeof( Fragment ) );
}
instList = (BLASInstanceEx*)bvhs;
blasList = blasses;
blasCount = bCount;
// copy relevant data from instance array
BVHNode& root = bvhNode[0];
root.leftFirst = 0, root.triCount = instCount, root.aabbMin = bvhdbl3( BVH_DBL_FAR ), root.aabbMax = bvhdbl3( -BVH_DBL_FAR );
for (uint64_t i = 0; i < instCount; i++)
{
FATAL_ERROR_IF( instList[i].blas->bvhNode == 0, "BVH::Build( BLASInstanceEx*, .. ), BLAS not built." );
// transform the eight corners of the root node aabb using the instance
// transform and calculate the worldspace aabb over those.
instList[i].InvertTransform(); // TODO: done unconditionally; for a big TLAS this may be wasteful. Detect changes automatically?
bvhdbl3 minBounds = bvhdbl3( BVH_DBL_FAR ), maxBounds = bvhdbl3( -BVH_DBL_FAR );
bvhdbl3 bmin = instList[i].blas->bvhNode[0].aabbMin, bmax = instList[i].blas->bvhNode[0].aabbMax;
for (int32_t j = 0; j < 8; j++)
{
const bvhdbl3 p( j & 1 ? bmax.x : bmin.x, j & 2 ? bmax.y : bmin.y, j & 4 ? bmax.z : bmin.z );
const bvhdbl3 t = instList[i].TransformPoint( p, instList[i].transform );
minBounds = tinybvh_min( minBounds, t ), maxBounds = tinybvh_max( maxBounds, t );
}
fragment[i].bmin = minBounds, fragment[i].primIdx = i, fragment[i].bmax = maxBounds;
root.aabbMin = tinybvh_min( root.aabbMin, minBounds );
root.aabbMax = tinybvh_max( root.aabbMax, maxBounds ), triIdx[i] = i;
uint64_t blasIdx = instList[i].blasIdx;
BVH_Double* blas = blasList[blasIdx];
instList[i].Update( blas );
fragment[i].bmin = instList[i].aabbMin, fragment[i].primIdx = i, fragment[i].bmax = instList[i].aabbMax;
root.aabbMin = tinybvh_min( root.aabbMin, instList[i].aabbMin );
root.aabbMax = tinybvh_max( root.aabbMax, instList[i].aabbMax ), triIdx[i] = i;
}
// start build
newNodePtr = 1;
Expand Down Expand Up @@ -5715,6 +5722,7 @@ void BVH_Double::Build()
if (taskCount == 0) break; else nodeIdx = task[--taskCount];
}
// all done.
aabbMin = bvhNode[0].aabbMin, aabbMax = bvhNode[0].aabbMax;
refittable = true; // not using spatial splits: can refit this BVH
may_have_holes = false; // the reference builder produces a continuous list of nodes
bvh_over_aabbs = (verts == 0); // bvh over aabbs is suitable as TLAS
Expand Down Expand Up @@ -5812,7 +5820,7 @@ int32_t BVH_Double::IntersectTLAS( RayEx& ray ) const
// BLAS traversal
const uint64_t instIdx = triIdx[node->leftFirst + i];
BLASInstanceEx& inst = instList[instIdx];
BVH_Double* blas = inst.blas;
BVH_Double* blas = blasList[inst.blasIdx];
// 1. Transform ray with the inverse of the instance transform
tmp.O = inst.TransformPoint( ray.O, inst.invTransform );
tmp.D = inst.TransformVector( ray.D, inst.invTransform );
Expand Down Expand Up @@ -5912,7 +5920,7 @@ bool BVH_Double::IsOccludedTLAS( const RayEx& ray ) const
{
// BLAS traversal
BLASInstanceEx& inst = instList[triIdx[node->leftFirst + i]];
BVH_Double* blas = inst.blas;
BVH_Double* blas = blasList[inst.blasIdx];
// 1. Transform ray with the inverse of the instance transform
tmp.O = inst.TransformPoint( ray.O, inst.invTransform );
tmp.D = inst.TransformVector( ray.D, inst.invTransform );
Expand Down Expand Up @@ -5965,6 +5973,22 @@ double BVH_Double::BVHNode::Intersect( const RayEx& ray ) const
//
// ============================================================================

// Update
void BLASInstance::Update( BVHBase* blas )
{
InvertTransform(); // TODO: done unconditionally; for a big TLAS this may be wasteful. Detect changes automatically?
// transform the eight corners of the root node aabb using the
// instance transform and calculate the worldspace aabb over those.
aabbMin = bvhvec3( BVH_FAR ), aabbMax = bvhvec3( -BVH_FAR );
bvhvec3 bmin = blas->aabbMin, bmax = blas->aabbMax;
for (int32_t j = 0; j < 8; j++)
{
const bvhvec3 p( j & 1 ? bmax.x : bmin.x, j & 2 ? bmax.y : bmin.y, j & 4 ? bmax.z : bmin.z );
const bvhvec3 t = TransformPoint( p, transform );
aabbMin = tinybvh_min( aabbMin, t ), aabbMax = tinybvh_max( aabbMax, t );
}
}

// TransformPoint
bvhvec3 BLASInstance::TransformPoint( const bvhvec3& v, const float* T ) const
{
Expand Down Expand Up @@ -6012,6 +6036,22 @@ void BLASInstance::InvertTransform()

#ifdef DOUBLE_PRECISION_SUPPORT

// Update
void BLASInstanceEx::Update( BVH_Double* blas )
{
InvertTransform(); // TODO: done unconditionally; for a big TLAS this may be wasteful. Detect changes automatically?
// transform the eight corners of the root node aabb using the
// instance transform and calculate the worldspace aabb over those.
aabbMin = bvhdbl3( BVH_FAR ), aabbMax = bvhdbl3( -BVH_FAR );
bvhdbl3 bmin = blas->aabbMin, bmax = blas->aabbMax;
for (int32_t j = 0; j < 8; j++)
{
const bvhdbl3 p( j & 1 ? bmax.x : bmin.x, j & 2 ? bmax.y : bmin.y, j & 4 ? bmax.z : bmin.z );
const bvhdbl3 t = TransformPoint( p, transform );
aabbMin = tinybvh_min( aabbMin, t ), aabbMax = tinybvh_max( aabbMax, t );
}
}

// TransformPoint
bvhdbl3 BLASInstanceEx::TransformPoint( const bvhdbl3& v, const double* T ) const
{
Expand Down
Loading

0 comments on commit 52a89a6

Please sign in to comment.