Skip to content

Commit

Permalink
TLAS/BLAS feature complete.
Browse files Browse the repository at this point in the history
  • Loading branch information
jbikker committed Jan 11, 2025
1 parent 6db12ae commit 462ef11
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 25 deletions.
20 changes: 16 additions & 4 deletions tiny_bvh.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,15 @@ THE SOFTWARE.
#define HQBVHBINS 32
#define AVXBINS 8 // must stay at 8.

// TLAS setting
// Note: Instance index is encoded in the top bits of the prim idx field.
// Max number of instances in TLAS: 2 ^ TLAS_BITS
// Max number of primitives per BLAS: 2 ^ (32 - TLAS_BITS)
#define TLAS_BITS 10 // max 1024 instances of 4M triangles each
// Derived; for convenience:
#define INST_IDX_SHFT (32 - TLAS_BITS)
#define PRIM_IDX_MASK ((1 << INST_IDX_SHFT) - 1)

// SAH BVH building: Heuristic parameters
// CPU builds: C_INT = 1, C_TRAV = 1 seems optimal.
#define C_INT 1
Expand Down Expand Up @@ -468,7 +477,7 @@ struct Ray
}
ALIGNED( 16 ) bvhvec3 O; uint32_t dummy1;
ALIGNED( 16 ) bvhvec3 D; uint32_t dummy2;
ALIGNED( 16 ) bvhvec3 rD; uint32_t dummy3;
ALIGNED( 16 ) bvhvec3 rD; uint32_t instIdx = 0;
ALIGNED( 16 ) Intersection hit;
};

Expand Down Expand Up @@ -1967,12 +1976,14 @@ int32_t BVH::IntersectTLAS( Ray& ray ) const
for (uint32_t i = 0; i < node->triCount; i++)
{
// BLAS traversal
BLASInstance& inst = instList[triIdx[node->leftFirst + i]];
BVH* blas = inst.blas;
const uint32_t instIdx = triIdx[node->leftFirst + i];
const BLASInstance& inst = instList[instIdx];
const BVH* blas = inst.blas;
// 1. Transform ray with the inverse of the instance transform
tmp.O = inst.TransformPoint( ray.O, inst.invTransform );
tmp.D = inst.TransformVector( ray.D, inst.invTransform );
tmp.rD = tinybvh_safercp( tmp.D );
tmp.instIdx = instIdx << (32 - TLAS_BITS);
tmp.hit = ray.hit;
// 2. Traverse BLAS with the transformed ray
cost += blas->Intersect( tmp );
Expand Down Expand Up @@ -5860,7 +5871,8 @@ void BVHBase::IntersectTri( Ray& ray, const bvhvec4slice& verts, const uint32_t
if (t > 0 && t < ray.hit.t)
{
// register a hit: ray is shortened to t
ray.hit.t = t, ray.hit.u = u, ray.hit.v = v, ray.hit.prim = idx;
ray.hit.t = t, ray.hit.u = u, ray.hit.v = v;
ray.hit.prim = idx + ray.instIdx;
}
}

Expand Down
78 changes: 58 additions & 20 deletions tiny_bvh_anim.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
#define FENSTER_APP_IMPLEMENTATION
#define SCRWIDTH 800
#define SCRHEIGHT 600
#define TILESIZE 20
#include "external/fenster.h" // https://github.com/zserge/fenster

#define TINYBVH_IMPLEMENTATION
#include "tiny_bvh.h"
#include <fstream>
#include <thread>

using namespace tinybvh;

Expand All @@ -14,6 +16,8 @@ BLASInstance inst[3];
int frameIdx = 0, verts = 0, bverts = 0;
bvhvec4* triangles = 0;
bvhvec4* bunny = 0;
static std::atomic<int> tileIdx( 0 );
static unsigned threadCount = std::thread::hardware_concurrency();

// setup view pyramid for a pinhole camera
static bvhvec3 eye( -15.24f, 21.5f, 2.54f ), p1, p2, p3;
Expand All @@ -37,10 +41,11 @@ void Init()
// build a TLAS
inst[0] = BLASInstance( &bvh ); // static geometry
inst[1] = BLASInstance( &blas );
inst[1].transform[0] = inst[1].transform[5] = inst[1].transform[10] = 0.5f; // scale
inst[1].transform[3 /* i.e., x translation */] = 4;
inst[2] = BLASInstance( &blas );
inst[2].transform[0] = inst[2].transform[5] = inst[2].transform[10] = 0.5f; // scale
inst[2].transform[3 /* i.e., x translation */] = -4;
tlas.Build( inst, 3 );
}

bool UpdateCamera( float delta_time_s, fenster& f )
Expand All @@ -61,35 +66,68 @@ bool UpdateCamera( float delta_time_s, fenster& f )
return moved > 0;
}

void Tick( float delta_time_s, fenster& f, uint32_t* buf )
void TraceWorkerThread( uint32_t* buf, int threadIdx )
{
// handle user input and update camera
bool moved = UpdateCamera( delta_time_s, f ) || frameIdx++ == 0;

// clear the screen with a debug-friendly color
for (int i = 0; i < SCRWIDTH * SCRHEIGHT; i++) buf[i] = 0xaaaaff;

// trace rays
const bvhvec3 L = normalize( bvhvec3( 1, 2, 3 ) );
for (int ty = 0; ty < SCRHEIGHT / 4; ty++) for (int tx = 0; tx < SCRWIDTH / 4; tx++)
const int xtiles = SCRWIDTH / TILESIZE, ytiles = SCRHEIGHT / TILESIZE;
const int tiles = xtiles * ytiles;
int tile = threadIdx;
while (tile < tiles)
{
for (int y = 0; y < 4; y++) for (int x = 0; x < 4; x++)
const int tx = tile % xtiles, ty = tile / xtiles;
unsigned seed = (tile + 17) * 171717 + frameIdx * 1023;
const bvhvec3 L = normalize( bvhvec3( 1, 2, 3 ) );
for (int y = 0; y < TILESIZE; y++) for (int x = 0; x < TILESIZE; x++)
{
float u = (float)(tx * 4 + x) / SCRWIDTH, v = (float)(ty * 4 + y) / SCRHEIGHT;
bvhvec3 D = normalize( p1 + u * (p2 - p1) + v * (p3 - p1) - eye );
const int pixel_x = tx * TILESIZE + x, pixel_y = ty * TILESIZE + y;
const int pixelIdx = pixel_x + pixel_y * SCRWIDTH;
// setup primary ray
const float u = (float)pixel_x / SCRWIDTH, v = (float)pixel_y / SCRHEIGHT;
const bvhvec3 D = normalize( p1 + u * (p2 - p1) + v * (p3 - p1) - eye );
Ray ray( eye, D, 1e30f );
tlas.Intersect( ray );
if (ray.hit.t < 10000)
{
int pixel_x = tx * 4 + x, pixel_y = ty * 4 + y, primIdx = ray.hit.prim;
bvhvec3 v0 = triangles[primIdx * 3];
bvhvec3 v1 = triangles[primIdx * 3 + 1];
bvhvec3 v2 = triangles[primIdx * 3 + 2];
bvhvec3 N = normalize( cross( v1 - v0, v2 - v0 ) );
uint32_t pixel_x = tx * 4 + x, pixel_y = ty * 4 + y;
uint32_t primIdx = ray.hit.prim & PRIM_IDX_MASK;
uint32_t instIdx = ray.hit.prim >> INST_IDX_SHFT;
bvhvec4slice& instTris = inst[instIdx].blas->verts;
bvhvec3 v0 = instTris[primIdx * 3];
bvhvec3 v1 = instTris[primIdx * 3 + 1];
bvhvec3 v2 = instTris[primIdx * 3 + 2];
bvhvec3 N = normalize( cross( v1 - v0, v2 - v0 ) ); // TODO: Transform to world space
int c = (int)(255.9f * fabs( dot( N, L ) ));
buf[pixel_x + pixel_y * SCRWIDTH] = c + (c << 8) + (c << 16);
buf[pixelIdx] = c + (c << 8) + (c << 16);
}
}
tile = tileIdx++;
}
}

void Tick( float delta_time_s, fenster& f, uint32_t* buf )
{
// handle user input and update camera
bool moved = UpdateCamera( delta_time_s, f ) || frameIdx++ == 0;

// clear the screen with a debug-friendly color
for (int i = 0; i < SCRWIDTH * SCRHEIGHT; i++) buf[i] = 0xaaaaff;

// update TLAS
tlas.Build( inst, 3 );

// render tiles
tileIdx = threadCount;
std::vector<std::thread> threads;
for (uint32_t i = 0; i < threadCount; i++)
threads.emplace_back( &TraceWorkerThread, buf, i );
for (auto& thread : threads) thread.join();

// change instance transforms
static float a[3] = { 0 };
for (int i = 1; i < 3; i++)
{
inst[i].transform[7] /* y-pos */ = sinf( a[i] ) * 3.0f + 3.5f;
a[i] += 0.1f + (0.01f * (float)i);
if (a[i] > 6.2832f) a[i] -= 6.2832f;
}
}

Expand Down
2 changes: 1 addition & 1 deletion tiny_bvh_pt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ static int triCount = 0, frameIdx = 0, spp = 0;
static bvhvec3 accumulator[SCRWIDTH * SCRHEIGHT];
static std::atomic<int> tileIdx( 0 );

// Multi-therading
// Multi-threading
static unsigned threadCount = std::thread::hardware_concurrency();

// Setup view pyramid for a pinhole camera:
Expand Down

0 comments on commit 462ef11

Please sign in to comment.