diff --git a/src/osgEarth/Chonk b/src/osgEarth/Chonk index 7b6ff485fb..fd9905fc8c 100644 --- a/src/osgEarth/Chonk +++ b/src/osgEarth/Chonk @@ -303,7 +303,7 @@ namespace osgEarth float visibility[2]; // per LOD float radius; // per chonk float alphaCutoff; - GLuint first_lod_cmd_index; + GLint first_lod_cmd_index = -1; // invalid instance }; using Instances = std::vector<Instance>; using Batches = std::unordered_map<Chonk::Ptr, Instances>; diff --git a/src/osgEarth/Chonk.Culling.glsl b/src/osgEarth/Chonk.Culling.glsl index 0f5b96d538..5621cb78d2 100644 --- a/src/osgEarth/Chonk.Culling.glsl +++ b/src/osgEarth/Chonk.Culling.glsl @@ -4,7 +4,7 @@ #pragma import_defines(OE_GPUCULL_DEBUG) #pragma import_defines(OE_IS_SHADOW_CAMERA) -layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; +layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in; struct DrawElementsIndirectCommand { @@ -98,6 +98,10 @@ void cull() const uint i = gl_GlobalInvocationID.x; // instance const uint lod = gl_GlobalInvocationID.y; // lod + // skip instances that exist only to pad the instance array to the workgroup size: + if (input_instances[i].first_lod_cmd_index < 0) + return; + // initialize by clearing the visibility for this LOD: input_instances[i].visibility[lod] = 0.0; diff --git a/src/osgEarth/Chonk.cpp b/src/osgEarth/Chonk.cpp index d69def6283..07cb1ac681 100644 --- a/src/osgEarth/Chonk.cpp +++ b/src/osgEarth/Chonk.cpp @@ -42,6 +42,9 @@ using namespace osgEarth; #define MAX_NEAR_PIXEL_SCALE FLT_MAX +// note: this MUST match the local_size product in Chonk.Culling.glsl +#define GPU_CULLING_LOCAL_WG_SIZE 32 + namespace { struct SendIndices @@ -1176,7 +1179,7 @@ ChonkDrawable::GLObjects::update( } // append the instance data (transforms) and set - // the index of the first variant command, which the compute + // the index of the first lod command, which the compute // shader will need. for (auto& instance : instances) { @@ -1184,6 +1187,14 @@ ChonkDrawable::GLObjects::update( _all_instances.back().first_lod_cmd_index = first_lod_cmd_index; } + // pad out the size of the instances array so it's a multiple of the + // GPU culling workgroup size. Add "padding" instances will have the + // first_lod_cmd_index member equal to -1, indicating an invalid instance. + // The CS will check for this and discard them. + unsigned workgroups = (_all_instances.size() + GPU_CULLING_LOCAL_WG_SIZE - 1) / GPU_CULLING_LOCAL_WG_SIZE; + unsigned paddedSize = workgroups * GPU_CULLING_LOCAL_WG_SIZE; + _all_instances.resize(paddedSize); + max_lod_count = std::max(max_lod_count, lod_commands.size()); } @@ -1261,15 +1272,16 @@ ChonkDrawable::GLObjects::cull(osg::State& state) // calls for each tile. // Also, removing the memory barrier seems to make no difference, // but it's the right thing to do + unsigned workgroups = (_numInstances + (GPU_CULLING_LOCAL_WG_SIZE-1)) / GPU_CULLING_LOCAL_WG_SIZE; // cull: ext->glUniform1i(ps._passUL, 0); - ext->glDispatchCompute(_numInstances, _maxNumLODs, 1); + ext->glDispatchCompute(workgroups, _maxNumLODs, 1); // compact: ext->glUniform1i(ps._passUL, 1); ext->glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); - ext->glDispatchCompute(_numInstances, _maxNumLODs, 1); + ext->glDispatchCompute(workgroups, _maxNumLODs, 1); } void diff --git a/src/osgEarth/Version.in b/src/osgEarth/Version.in index 97bc603e27..745447ef57 100644 --- a/src/osgEarth/Version.in +++ b/src/osgEarth/Version.in @@ -34,32 +34,32 @@ namespace osgEarth (major == rhs.major && minor < rhs.minor) || (major == rhs.major && minor == rhs.minor && patch < rhs.patch); } - inline bool lessThan(int major, int minor, int patch) const { - return this->lessThan(Version{ major, minor, patch }); + inline bool lessThan(int in_major, int in_minor, int in_patch) const { + return lessThan(Version{ in_major, in_minor, in_patch }); } inline bool lessThanOrEqualTo(const Version& rhs) const { return major < rhs.major || (major == rhs.major && minor < rhs.minor) || (major == rhs.major && minor == rhs.minor && patch <= rhs.patch); } - inline bool lessThanOrEqualTo(int major, int minor, int patch) const { - return this->lessThanOrEqualTo(Version{ major, minor, patch }); + inline bool lessThanOrEqualTo(int in_major, int in_minor, int in_patch) const { + return lessThanOrEqualTo(Version{ in_major, in_minor, in_patch }); } inline bool greaterThan(const Version& rhs) const { return major > rhs.major || (major == rhs.major && minor > rhs.minor) || (major == rhs.major && minor == rhs.minor && patch > rhs.patch); } - inline bool greaterThan(int major, int minor, int patch) const { - return this->greaterThan(Version{ major, minor, patch }); + inline bool greaterThan(int in_major, int in_minor, int in_patch) const { + return greaterThan(Version{ in_major, in_minor, in_patch }); } inline bool greaterThanOrEqualTo(const Version& rhs) const { return major > rhs.major || (major == rhs.major && minor > rhs.minor) || (major == rhs.major && minor == rhs.minor && patch >= rhs.patch); } - inline bool greaterThanOrEqualTo(int major, int minor, int patch) const { - return this->greaterThanOrEqualTo(Version{ major, minor, patch }); + inline bool greaterThanOrEqualTo(int in_major, int in_minor, int in_patch) const { + return greaterThanOrEqualTo(Version{ in_major, in_minor, in_patch }); } };