Skip to content

Commit

Permalink
Compat: Refactor fwidth/Fine/Coarse for 0 storage buffers. (gpuweb#4128)
Browse files Browse the repository at this point in the history
Modified so this test doesn't use storage buffers by having
it return values from a fragment shader as rgba32uint
  • Loading branch information
greggman authored Jan 9, 2025
1 parent 2a7c7f9 commit 077ffee
Show file tree
Hide file tree
Showing 2 changed files with 161 additions and 133 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,11 @@ export function runDerivativeTest(
// We will populate a uniform buffer with these input values laid out sequentially:
// [ case_0_input_1, case_0_input_0, case_1_input_1, case_1_input_0, ...]
//
// The render pipeline will be launched once per pixel per pair of cases over
// a viewport size of (2, 2) with the viewport set to cover 1 pixel.
// Each 2x2 set of calls will will exercise two test cases. Each of these
// draw calls will use a different instance index, which is forwarded to the
// fragment shader. Each invocation returns the result which is stored in
// a rgba32uint texture.
// The render pipeline will be launched once per pair of cases over a viewport
// size of (2, 2). Each 2x2 set of calls will will exercise two test cases.
// Each of these draw calls will use a different instance index, which is
// forwarded to the fragment shader. Each invocation returns the result which
// is stored in a rgba32uint texture.
//
// Consider draw calls that test 4 cases (c_0, c_1, c_2, c_3).
//
Expand Down
283 changes: 156 additions & 127 deletions src/webgpu/shader/execution/expression/call/builtin/fwidth.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import { assert } from '../../../../../../common/util/util.js';
import { GPUTest } from '../../../../../gpu_test.js';
import { anyOf } from '../../../../../util/compare.js';
import { Type, Value } from '../../../../../util/conversion.js';
import { FPInterval } from '../../../../../util/floating_point.js';
import { align } from '../../../../../util/math.js';
import { Case } from '../../case.js';
import { toComparator } from '../../expectation.js';

Expand All @@ -22,14 +24,11 @@ export function runFWidthTest(
) {
////////////////////////////////////////////////////////////////
// The four input values for a given case are distributed to across the invocations in a quad.
// We will populate a storage buffer with these input values laid out sequentially:
// We will populate a uniform buffer with these input values laid out sequentially:
// [ case0_input0, case0_input1, case0_input2, case0_input3, ...]
//
// The render pipeline will be launched several times over a viewport size of (2, 2). Each draw
// call will execute a single quad (four fragment invocation), which will exercise one test case.
// Each of these draw calls will use a different instance index, which is forwarded to the
// fragment shader. Each invocation will determine its index into the storage buffer using its
// fragment position and the instance index for that draw call.
// The render pipeline a 512x2 texture. In the fragment shader, every 2x2 texels is one test case.
// The results are the output from the fragment shader.
//
// Consider two draw calls that test 2 cases (c0, c1).
//
Expand All @@ -46,46 +45,56 @@ export function runFWidthTest(
}

// Determine the WGSL type to use in the shader, and the stride in bytes between values.
let valueStride = 4;
let wgslType = 'f32';
const valueStride = 16;
let conversionFromInput = 'input.x';
let conversionToOutput = `vec4f(v, 0, 0, 0)`;
if (vectorize) {
wgslType = `vec${vectorize}f`;
valueStride = vectorize * 4;
if (vectorize === 3) {
valueStride = 16;
switch (vectorize) {
case 2:
conversionFromInput = 'input.xy';
conversionToOutput = 'vec4f(v, 0, 0)';
break;
case 3:
conversionFromInput = 'input.xyz';
conversionToOutput = 'vec4f(v, 0)';
break;
case 4:
conversionFromInput = 'input';
conversionToOutput = 'v';
break;
}
}

const kUniformBufferSize = 16384; // min supported by compat mode.
const kNumCasesPerUniformBuffer = kUniformBufferSize / 64;

// Define a vertex shader that draws a triangle over the full viewport, and a fragment shader that
// calls the fwidth builtin with a value loaded from that fragment's index into the storage
// buffer (determined using the quad index and fragment position, as described above).
const code = `
struct CaseInfo {
@builtin(position) position: vec4f,
@location(0) @interpolate(flat, either) quad_idx: u32,
}
@vertex
fn vert(@builtin(vertex_index) vertex_idx: u32,
@builtin(instance_index) instance_idx: u32) -> CaseInfo {
fn vert(@builtin(vertex_index) vertex_idx: u32) -> @builtin(position) vec4f {
const kVertices = array(
vec2f(-2, -2),
vec2f( 2, -2),
vec2f( 0, 2),
vec2f( 3, -1),
vec2f(-1, 3),
vec2f(-1, -1),
);
return CaseInfo(vec4(kVertices[vertex_idx], 0, 1), instance_idx);
return vec4(kVertices[vertex_idx], 0, 1);
}
@group(0) @binding(0) var<storage, read> inputs : array<${wgslType}>;
@group(0) @binding(1) var<storage, read_write> outputs : array<${wgslType}>;
@group(0) @binding(0) var<uniform> inputs : array<vec4f, ${kNumCasesPerUniformBuffer * 4}>;
@fragment
fn frag(info : CaseInfo) {
let inv_idx = u32(info.position.x) + u32(info.position.y)*2;
let index = info.quad_idx*4 + inv_idx;
fn frag(@builtin(position) position: vec4f) -> @location(0) vec4u {
let t = vec2u(position.xy);
let inv_idx = t.x % 2 + (t.y % 2) * 2;
let q = t / 2;
let quad_idx = q.y * 256 + q.x;
let index = quad_idx * 4 + inv_idx;
let input = inputs[index];
${non_uniform_discard ? 'if inv_idx == 0 { discard; }' : ''}
outputs[index] = ${builtin}(input);
let v = ${builtin}(${conversionFromInput});
return bitcast<vec4u>(${conversionToOutput});
}
`;

Expand All @@ -94,116 +103,136 @@ fn frag(info : CaseInfo) {
const pipeline = t.device.createRenderPipeline({
layout: 'auto',
vertex: { module },
fragment: { module, targets: [{ format: 'rgba8unorm', writeMask: 0 }] },
});

// Create storage buffers to hold the inputs and outputs.
const bufferSize = cases.length * 4 * valueStride;
const inputBuffer = t.createBufferTracked({
size: bufferSize,
usage: GPUBufferUsage.STORAGE,
mappedAtCreation: true,
});
const outputBuffer = t.createBufferTracked({
size: bufferSize,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
fragment: { module, targets: [{ format: 'rgba32uint' }] },
});

// Populate the input storage buffer with case input values.
const valuesData = new Uint8Array(inputBuffer.getMappedRange());
for (let i = 0; i < cases.length / vectorWidth; i++) {
for (let v = 0; v < vectorWidth; v++) {
const index = i * vectorWidth + v;
if (index >= cases.length) {
break;
}
const inputs = cases[index].input as ReadonlyArray<Value>;
for (let x = 0; x < 4; x++) {
inputs[x].copyTo(valuesData, (i * 4 + x) * valueStride + v * 4);
}
}
}
inputBuffer.unmap();

// Create a bind group for the storage buffers.
const group = t.device.createBindGroup({
entries: [
{ binding: 0, resource: { buffer: inputBuffer } },
{ binding: 1, resource: { buffer: outputBuffer } },
],
layout: pipeline.getBindGroupLayout(0),
});

// Create a texture to use as a color attachment.
// We only need this for launching the desired number of fragment invocations.
// Create a texture to use as a color attachment to receive the results;
const width = kNumCasesPerUniformBuffer * 2;
const height = 2;
// note: We could limit it to this size and increase height but kNumCasesPerUniformBuffer is limited to 256
// because we can't fit more into a single uniform buffer in compat.
assert(width < t.device.limits.maxTextureDimension2D);
const colorAttachment = t.createTextureTracked({
size: { width: 2, height: 2 },
format: 'rgba8unorm',
usage: GPUTextureUsage.RENDER_ATTACHMENT,
size: [width, height],
format: 'rgba32uint',
usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
});
const bytesPerRow = align(width * 16, 256);

// Submit the render pass to the device.
const results = [];
const encoder = t.device.createCommandEncoder();
const pass = encoder.beginRenderPass({
colorAttachments: [
{
view: colorAttachment.createView(),
loadOp: 'clear',
storeOp: 'discard',
},
],
});
pass.setPipeline(pipeline);
pass.setBindGroup(0, group);
for (let quad = 0; quad < cases.length / vectorWidth; quad++) {
pass.draw(3, 1, undefined, quad);
for (let c = 0; c < cases.length; c += kNumCasesPerUniformBuffer) {
// Create uniform buffer to hold the inputs.
const inputBuffer = t.createBufferTracked({
size: kUniformBufferSize,
usage: GPUBufferUsage.UNIFORM,
mappedAtCreation: true,
});
const valuesData = new Uint8Array(inputBuffer.getMappedRange());

// Populate the input uniform buffer with case input values.
for (let i = 0; i < kNumCasesPerUniformBuffer / vectorWidth; i++) {
for (let v = 0; v < vectorWidth; v++) {
const index = c + i * vectorWidth + v;
if (index >= cases.length) {
break;
}
const inputs = cases[index].input as ReadonlyArray<Value>;
for (let x = 0; x < 4; x++) {
inputs[x].copyTo(valuesData, (i * 4 + x) * valueStride + v * 4);
}
}
}
inputBuffer.unmap();

// Create a bind group for the input buffer.
const group = t.device.createBindGroup({
entries: [{ binding: 0, resource: { buffer: inputBuffer } }],
layout: pipeline.getBindGroupLayout(0),
});

// Submit the render pass to the device.
const pass = encoder.beginRenderPass({
colorAttachments: [
{
view: colorAttachment.createView(),
loadOp: 'clear',
storeOp: 'store',
},
],
});
pass.setPipeline(pipeline);
pass.setBindGroup(0, group);
pass.draw(3);
pass.end();

// Create buffer to hold the outputs.
const outputBuffer = t.createBufferTracked({
size: bytesPerRow * height,
usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
});
results.push(outputBuffer);

// Copy the texture to the output buffer
encoder.copyTextureToBuffer(
{ texture: colorAttachment },
{ buffer: outputBuffer, bytesPerRow },
[colorAttachment.width, colorAttachment.height]
);
}
pass.end();
t.queue.submit([encoder.finish()]);

// Check the outputs match the expected results.
t.expectGPUBufferValuesPassCheck(
outputBuffer,
(outputData: Uint8Array) => {
for (let i = 0; i < cases.length / vectorWidth; i++) {
for (let v = 0; v < vectorWidth; v++) {
const index = i * vectorWidth + v;
if (index >= cases.length) {
break;
}
const c = cases[index];

for (let x = 0; x < 4; x++) {
if (non_uniform_discard && x === 0) {
continue;
results.forEach((outputBuffer, groupNdx) => {
// Check the outputs match the expected results.
t.expectGPUBufferValuesPassCheck(
outputBuffer,
(outputData: Uint8Array) => {
const base = groupNdx * kNumCasesPerUniformBuffer;
const numCases = Math.min(kNumCasesPerUniformBuffer, cases.length - base);
const numQuads = numCases / vectorWidth;
for (let i = 0; i < numQuads; i++) {
for (let v = 0; v < vectorWidth; v++) {
const caseNdx = base + i * vectorWidth + v;
if (caseNdx >= cases.length) {
break;
}

const index = (i * 4 + x) * valueStride + v * 4;
const result = Type.f32.read(outputData, index);

let expected = c.expected;
if (builtin.endsWith('Fine')) {
expected = toComparator((expected as FPInterval[])[x]);
} else {
expected = anyOf(...(expected as FPInterval[]));
}

const cmp = expected.compare(result);
if (!cmp.matched) {
return new Error(`
inputs: (${(c.input as Value[]).join(', ')})
expected: ${cmp.expected}
returned: ${result}`);
const c = cases[caseNdx];

for (let x = 0; x < 4; x++) {
if (non_uniform_discard && x === 0) {
continue;
}

const tx = x % 2;
const ty = (x / 2) | 0;
const index = ty * bytesPerRow + i * 32 + tx * 16 + v * 4;
const result = Type.f32.read(outputData, index);

let expected = c.expected;
if (builtin.endsWith('Fine')) {
expected = toComparator((expected as FPInterval[])[x]);
} else {
expected = anyOf(...(expected as FPInterval[]));
}

const cmp = expected.compare(result);
if (!cmp.matched) {
return new Error(`
caseNdx: ${caseNdx} v: ${v} x: ${x}
inputs: (${(c.input as Value[]).join(', ')})
expected: ${cmp.expected}
returned: ${result}`);
}
}
}
}
return undefined;
},
{
type: Uint8Array,
typedLength: outputBuffer.size,
}
return undefined;
},
{
type: Uint8Array,
typedLength: bufferSize,
}
);
);
});
}

0 comments on commit 077ffee

Please sign in to comment.