diff --git a/src/osgEarth/Chonk b/src/osgEarth/Chonk
index 7b6ff485fb..fd9905fc8c 100644
--- a/src/osgEarth/Chonk
+++ b/src/osgEarth/Chonk
@@ -303,7 +303,7 @@ namespace osgEarth
             float visibility[2]; // per LOD
             float radius; // per chonk
             float alphaCutoff;
-            GLuint first_lod_cmd_index;
+            GLint first_lod_cmd_index = -1; // invalid instance
         };
         using Instances = std::vector<Instance>;
         using Batches = std::unordered_map<Chonk::Ptr, Instances>;
diff --git a/src/osgEarth/Chonk.Culling.glsl b/src/osgEarth/Chonk.Culling.glsl
index 0f5b96d538..5621cb78d2 100644
--- a/src/osgEarth/Chonk.Culling.glsl
+++ b/src/osgEarth/Chonk.Culling.glsl
@@ -4,7 +4,7 @@
 #pragma import_defines(OE_GPUCULL_DEBUG)
 #pragma import_defines(OE_IS_SHADOW_CAMERA)
 
-layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;
 
 struct DrawElementsIndirectCommand
 {
@@ -98,6 +98,10 @@ void cull()
     const uint i = gl_GlobalInvocationID.x; // instance
     const uint lod = gl_GlobalInvocationID.y; // lod
 
+    // skip instances that exist only to pad the instance array to the workgroup size:
+    if (input_instances[i].first_lod_cmd_index < 0)
+        return;
+
     // initialize by clearing the visibility for this LOD:
     input_instances[i].visibility[lod] = 0.0;
 
diff --git a/src/osgEarth/Chonk.cpp b/src/osgEarth/Chonk.cpp
index d69def6283..07cb1ac681 100644
--- a/src/osgEarth/Chonk.cpp
+++ b/src/osgEarth/Chonk.cpp
@@ -42,6 +42,9 @@ using namespace osgEarth;
 
 #define MAX_NEAR_PIXEL_SCALE FLT_MAX
 
+// note: this MUST match the local_size product in Chonk.Culling.glsl
+#define GPU_CULLING_LOCAL_WG_SIZE 32
+
 namespace
 {
     struct SendIndices
@@ -1176,7 +1179,7 @@ ChonkDrawable::GLObjects::update(
         }
 
         // append the instance data (transforms) and set
-        // the index of the first variant command, which the compute
+        // the index of the first lod command, which the compute
         // shader will need.
         for (auto& instance : instances)
         {
@@ -1184,6 +1187,14 @@ ChonkDrawable::GLObjects::update(
             _all_instances.back().first_lod_cmd_index = first_lod_cmd_index;
         }
 
+        // pad out the size of the instances array so it's a multiple of the
+        // GPU culling workgroup size. Add "padding" instances will have the
+        // first_lod_cmd_index member equal to -1, indicating an invalid instance.
+        // The CS will check for this and discard them.
+        unsigned workgroups = (_all_instances.size() + GPU_CULLING_LOCAL_WG_SIZE - 1) / GPU_CULLING_LOCAL_WG_SIZE;
+        unsigned paddedSize = workgroups * GPU_CULLING_LOCAL_WG_SIZE;
+        _all_instances.resize(paddedSize);
+
         max_lod_count = std::max(max_lod_count, lod_commands.size());
     }
 
@@ -1261,15 +1272,16 @@ ChonkDrawable::GLObjects::cull(osg::State& state)
     // calls for each tile.
     // Also, removing the memory barrier seems to make no difference,
     // but it's the right thing to do
+    unsigned workgroups = (_numInstances + (GPU_CULLING_LOCAL_WG_SIZE-1)) / GPU_CULLING_LOCAL_WG_SIZE;
 
     // cull:
     ext->glUniform1i(ps._passUL, 0);
-    ext->glDispatchCompute(_numInstances, _maxNumLODs, 1);
+    ext->glDispatchCompute(workgroups, _maxNumLODs, 1);
 
     // compact:
     ext->glUniform1i(ps._passUL, 1);
     ext->glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
-    ext->glDispatchCompute(_numInstances, _maxNumLODs, 1);
+    ext->glDispatchCompute(workgroups, _maxNumLODs, 1);
 }
 
 void
diff --git a/src/osgEarth/Version.in b/src/osgEarth/Version.in
index 97bc603e27..745447ef57 100644
--- a/src/osgEarth/Version.in
+++ b/src/osgEarth/Version.in
@@ -34,32 +34,32 @@ namespace osgEarth
                 (major == rhs.major && minor < rhs.minor) ||
                 (major == rhs.major && minor == rhs.minor && patch < rhs.patch);
         }
-        inline bool lessThan(int major, int minor, int patch) const {
-            return this->lessThan(Version{ major, minor, patch });
+        inline bool lessThan(int in_major, int in_minor, int in_patch) const {
+            return lessThan(Version{ in_major, in_minor, in_patch });
         }
         inline bool lessThanOrEqualTo(const Version& rhs) const {
             return major < rhs.major ||
                 (major == rhs.major && minor < rhs.minor) ||
                 (major == rhs.major && minor == rhs.minor && patch <= rhs.patch);
         }
-        inline bool lessThanOrEqualTo(int major, int minor, int patch) const {
-            return this->lessThanOrEqualTo(Version{ major, minor, patch });
+        inline bool lessThanOrEqualTo(int in_major, int in_minor, int in_patch) const {
+            return lessThanOrEqualTo(Version{ in_major, in_minor, in_patch });
         }
         inline bool greaterThan(const Version& rhs) const {
             return major > rhs.major ||
                 (major == rhs.major && minor > rhs.minor) ||
                 (major == rhs.major && minor == rhs.minor && patch > rhs.patch);
         }
-        inline bool greaterThan(int major, int minor, int patch) const {
-            return this->greaterThan(Version{ major, minor, patch });
+        inline bool greaterThan(int in_major, int in_minor, int in_patch) const {
+            return greaterThan(Version{ in_major, in_minor, in_patch });
         }
         inline bool greaterThanOrEqualTo(const Version& rhs) const {
             return major > rhs.major ||
                 (major == rhs.major && minor > rhs.minor) ||
                 (major == rhs.major && minor == rhs.minor && patch >= rhs.patch);
         }
-        inline bool greaterThanOrEqualTo(int major, int minor, int patch) const {
-            return this->greaterThanOrEqualTo(Version{ major, minor, patch });
+        inline bool greaterThanOrEqualTo(int in_major, int in_minor, int in_patch) const {
+            return greaterThanOrEqualTo(Version{ in_major, in_minor, in_patch });
         }
     };