Skip to content

Commit

Permalink
Fragment subgroup_size test: Don't use broadcasting (#4035)
Browse files Browse the repository at this point in the history
This CL modify the shader for fragment subgroup_size built-in test to
prevent using broadcasting (from invocation 0), because any invocation
id can be inactivate for fragment shader. This CL use ballot instead and
do more validation within the shader.
  • Loading branch information
jzm-intel authored Nov 7, 2024
1 parent 447b667 commit b9f32fd
Showing 1 changed file with 133 additions and 87 deletions.
220 changes: 133 additions & 87 deletions src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1453,76 +1453,6 @@ function popcount(input: number): number {
return (((n + (n >> 4)) & 0xf0f0f0f) * 0x1010101) >> 24;
}

/**
* Checks subgroup_size builtin value consistency.
*
* The builtin subgroup_size is not assumed to be uniform in fragment shaders.
* Therefore, this function checks the value is a power of two within the device
* limits and that the ballot size is less than the stated size.
* @param data An array of vec4u that contains (per texel):
* * builtin value
* * ballot size
* * comparison to other invocations
* * 0
* @param format The texture format for data
* @param min The minimum subgroup size from the device
* @param max The maximum subgroup size from the device
* @param width The width of the framebuffer
* @param height The height of the framebuffer
*/
function checkSubgroupSizeConsistency(
data: Uint32Array,
format: GPUTextureFormat,
min: number,
max: number,
width: number,
height: number
): Error | undefined {
const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format];
const blocksPerRow = width / blockWidth;
// Image copies require bytesPerRow to be a multiple of 256.
const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256);
const uintsPerRow = bytesPerRow / 4;
const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4;

for (let row = 0; row < height; row++) {
for (let col = 0; col < width; col++) {
const offset = uintsPerRow * row + col * uintsPerTexel;
const builtinSize = data[offset];
const ballotSize = data[offset + 1];
const comparison = data[offset + 2];
if (builtinSize === 0) {
continue;
}

if (popcount(builtinSize) !== 1) {
return new Error(`Subgroup size '${builtinSize}' is not a power of two`);
}

if (builtinSize < min) {
return new Error(`Subgroup size '${builtinSize}' is less than minimum '${min}'`);
}
if (max < builtinSize) {
return new Error(`Subgroup size '${builtinSize}' is greater than maximum '${max}'`);
}

if (builtinSize < ballotSize) {
return new Error(`Inconsistent subgroup ballot size
- icoord: (${row}, ${col})
- expected: ${builtinSize}
- got: ${ballotSize}`);
}

if (comparison !== 1) {
return new Error(`Not all invocations in subgroup have same view of the size
- icoord: (${row}, ${col})`);
}
}
}

return undefined;
}

/**
* Runs a subgroup builtin test for fragment shaders
*
Expand Down Expand Up @@ -1618,6 +1548,101 @@ fn vsMain(@builtin(vertex_index) index : u32) -> @builtin(position) vec4f {
}
}

const kMaximiumSubgroupSize = 128;
// A non-zero magic number indicating no expectation error, in order to prevent the false no-error
// result from zero-initialization.
const kSubgroupShaderNoError = 17;

/**
* Checks subgroup_size builtin value consistency.
*
* The builtin subgroup_size is not assumed to be uniform in fragment shaders.
* Therefore, this function checks the value is a power of two within the device
* limits and that the ballot size is less than the stated size.
* @param data An array of vec4u that contains (per texel):
* * subgroup_size builtin value
* * balloted active invocations number
* * balloted subgroup size all active invocations agreed on, otherwise 0
* * error flag, should be equal to kSubgroupShaderNoError or shader found
* expection failed otherwise.
* @param format The texture format for data
* @param min The minimum subgroup size from the device
* @param max The maximum subgroup size from the device
* @param width The width of the framebuffer
* @param height The height of the framebuffer
*/
function checkSubgroupSizeConsistency(
data: Uint32Array,
format: GPUTextureFormat,
min: number,
max: number,
width: number,
height: number
): Error | undefined {
const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format];
const blocksPerRow = width / blockWidth;
// Image copies require bytesPerRow to be a multiple of 256.
const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256);
const uintsPerRow = bytesPerRow / 4;
const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4;

for (let row = 0; row < height; row++) {
for (let col = 0; col < width; col++) {
const offset = uintsPerRow * row + col * uintsPerTexel;
const subgroupSize = data[offset];
const countActive = data[offset + 1];
const ballotedSubgroupSize = data[offset + 2];
const error = data[offset + 3];

if (error === 0) {
// Inactive fragment get error `0` instead of noError. Check all output being zero.
if (subgroupSize !== 0 || countActive !== 0 || ballotedSubgroupSize !== 0) {
return new Error(
`Unexpected zero error with non-zero outputs for (${row}, ${col}): got output [${subgroupSize}, ${countActive}, ${ballotedSubgroupSize}, ${error}]`
);
}
continue;
}

if (popcount(subgroupSize) !== 1) {
return new Error(`Subgroup size '${subgroupSize}' is not a power of two`);
}

if (subgroupSize < min) {
return new Error(`Subgroup size '${subgroupSize}' is less than minimum '${min}'`);
}
if (max < subgroupSize) {
return new Error(`Subgroup size '${subgroupSize}' is greater than maximum '${max}'`);
}

if (subgroupSize < countActive) {
return new Error(`Unexpected active invocations number larger than subgroup size
- icoord: (${row}, ${col})
- subgroupSize: ${subgroupSize}
- countActive: ${countActive}`);
}

if (subgroupSize !== ballotedSubgroupSize) {
return new Error(`Inconsistent subgroup size
- icoord: (${row}, ${col})
- subgroupSize: ${subgroupSize}
- balloted subgroup size: ${ballotedSubgroupSize}`);
}

if (error !== kSubgroupShaderNoError) {
return new Error(
`Unexpected error value
- icoord: (${row}, ${col})
- expected: noError (${kSubgroupShaderNoError})
- got: ${error}`
);
}
}
}

return undefined;
}

g.test('subgroup_size')
.desc('Tests subgroup_size values')
.params(u =>
Expand All @@ -1639,6 +1664,9 @@ g.test('subgroup_size')
const fsShader = `
enable subgroups;
const maxSubgroupSize = ${kMaximiumSubgroupSize}u;
const noError = ${kSubgroupShaderNoError}u;
const width = ${t.params.size[0]};
const height = ${t.params.size[1]};
Expand All @@ -1647,16 +1675,38 @@ fn fsMain(
@builtin(position) pos : vec4f,
@builtin(subgroup_size) sg_size : u32,
) -> @location(0) vec4u {
let ballot = countOneBits(subgroupBallot(true));
let ballotSize = ballot.x + ballot.y + ballot.z + ballot.w;
var error: u32 = noError;
let ballotActive = countOneBits(subgroupBallot(true));
let countActive = ballotActive.x + ballotActive.y + ballotActive.z + ballotActive.w;
// Validate that balloted active invocations number no larger than subgroup size
if (countActive > sg_size) {
error++;
}
// Do all invocations in the subgroup see the same subgroup size?
let firstSize = subgroupBroadcast(sg_size, 0);
let compareBallot = countOneBits(subgroupBallot(firstSize == sg_size));
let compareSize = compareBallot.x + compareBallot.y + compareBallot.z + compareBallot.w;
let sameSize = select(0u, 1u, compareSize == ballotSize);
var subgroupSizeBallotedInvocations: u32 = 0u;
var ballotedSubgroupSize: u32 = 0u;
for (var i: u32 = 0; i <= maxSubgroupSize; i++) {
let ballotSubgroupSizeEqualI = countOneBits(subgroupBallot(sg_size == i));
let countSubgroupSizeEqualI = ballotSubgroupSizeEqualI.x + ballotSubgroupSizeEqualI.y + ballotSubgroupSizeEqualI.z + ballotSubgroupSizeEqualI.w;
subgroupSizeBallotedInvocations += countSubgroupSizeEqualI;
// Validate that all active invocations see the same subgroup size, i.e. ballotedSubgroupSize
if (countSubgroupSizeEqualI == countActive) {
ballotedSubgroupSize = i;
} else if (countSubgroupSizeEqualI != 0) {
error++;
}
}
// Validate that all active invocations balloted in previous loop
if (subgroupSizeBallotedInvocations != countActive) {
error++;
}
// Validate that ballotedSubgroupSize is identical to subgroup_size
if (ballotedSubgroupSize != sg_size) {
error++;
}
return vec4u(sg_size, ballotSize, sameSize, 0);
return vec4u(sg_size, countActive, ballotedSubgroupSize, error);
}`;

await runSubgroupTest(
Expand All @@ -1678,10 +1728,6 @@ fn fsMain(
);
});

// A non-zero magic number indicating no expectation error, in order to prevent the false no-error
// result from zero-initialization.
const kSubgroupInvocationIdNoError = 17;

/**
* Checks subgroup_invocation_id value consistency
*
Expand All @@ -1693,7 +1739,7 @@ const kSubgroupInvocationIdNoError = 17;
* * subgroup_invocation_id
* * subgroup size
* * ballot active invocation number
* * error flag, should be equal to kSubgroupInvocationIdNoError or shader found
* * error flag, should be equal to kSubgroupShaderNoError or shader found
* expection failed otherwise.
* @param format The texture format of data
* @param width The width of the framebuffer
Expand Down Expand Up @@ -1741,11 +1787,11 @@ function checkSubgroupInvocationIdConsistency(
);
}

if (error !== kSubgroupInvocationIdNoError) {
if (error !== kSubgroupShaderNoError) {
return new Error(
`Unexpected error value
- icoord: (${row}, ${col})
- expected: noError (${kSubgroupInvocationIdNoError})
- expected: noError (${kSubgroupShaderNoError})
- got: ${error}`
);
}
Expand Down Expand Up @@ -1773,10 +1819,10 @@ enable subgroups;
const width = ${t.params.size[0]};
const height = ${t.params.size[1]};
const maxSubgroupSize = 128u;
const maxSubgroupSize = ${kMaximiumSubgroupSize}u;
// A non-zero magic number indicating no expectation error, in order to prevent the
// false no-error result from zero-initialization.
const noError = ${kSubgroupInvocationIdNoError}u;
const noError = ${kSubgroupShaderNoError}u;
@fragment
fn fsMain(
Expand Down

0 comments on commit b9f32fd

Please sign in to comment.